Message ID | 20221219052128.18190-1-zhouzhouyi@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [linux-next] mark access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE | expand |
On Mon, Dec 19, 2022 at 01:21:28PM +0800, Zhouyi Zhou wrote: > mark access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE to fix concurrency bug > reported by KCSAN. > > Signed-off-by: Zhouyi Zhou <zhouzhouyi@gmail.com> This does indeed get rid of some KCSAN complaints, thank you!!! Tested-by: Paul E. McKenney <paulmck@kernel.org> > --- > During the rcutorture test on linux-next, > ./tools/testing/selftests/rcutorture/bin/torture.sh --do-kcsan --kcsan-kmake-arg "CC=clang-12" > following KCSAN BUG is reported: > [ 35.397089] BUG: KCSAN: data-race in tick_nohz_idle_stop_tick / tick_nohz_next_event^M > [ 35.400593] ^M > [ 35.401377] write to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 3:^M > [ 35.405325] tick_nohz_idle_stop_tick+0x14c/0x3e0^M > [ 35.407162] do_idle+0xf3/0x2a0^M > [ 35.408016] cpu_startup_entry+0x15/0x20^M > [ 35.409084] start_secondary+0x8f/0x90^M > [ 35.410207] secondary_startup_64_no_verify+0xe1/0xeb^M > [ 35.411607] ^M > [ 35.412042] no locks held by swapper/3/0.^M > [ 35.413172] irq event stamp: 53048^M > [ 35.414175] hardirqs last enabled at (53047): [<ffffffffb41f8404>] tick_nohz_idle_enter+0x104/0x140^M > [ 35.416681] hardirqs last disabled at (53048): [<ffffffffb41229f1>] do_idle+0x91/0x2a0^M > [ 35.418988] softirqs last enabled at (53038): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M > [ 35.421347] softirqs last disabled at (53029): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M > [ 35.423685] ^M > [ 35.424119] read to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 0:^M > [ 35.425870] tick_nohz_next_event+0x233/0x2b0^M > [ 35.427119] tick_nohz_idle_stop_tick+0x8f/0x3e0^M > [ 35.428386] do_idle+0xf3/0x2a0^M > [ 35.429265] cpu_startup_entry+0x15/0x20^M > [ 35.430429] rest_init+0x20c/0x210^M > [ 35.431382] arch_call_rest_init+0xe/0x10^M > [ 35.432508] start_kernel+0x544/0x600^M > [ 35.433519] secondary_startup_64_no_verify+0xe1/0xeb^M > > fix above bug by marking access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE > > Thanks > Zhouyi > -- > kernel/time/tick-common.c | 16 ++++++++-------- > kernel/time/tick-sched.c | 28 +++++++++++++++------------- > 2 files changed, 23 insertions(+), 21 deletions(-) > > diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c > index 46789356f856..a7753465a0be 100644 > --- a/kernel/time/tick-common.c > +++ b/kernel/time/tick-common.c > @@ -84,7 +84,7 @@ int tick_is_oneshot_available(void) > */ > static void tick_periodic(int cpu) > { > - if (tick_do_timer_cpu == cpu) { > + if (READ_ONCE(tick_do_timer_cpu) == cpu) { > raw_spin_lock(&jiffies_lock); > write_seqcount_begin(&jiffies_seq); > > @@ -184,9 +184,9 @@ static void giveup_do_timer(void *info) > { > int cpu = *(unsigned int *)info; > > - WARN_ON(tick_do_timer_cpu != smp_processor_id()); > + WARN_ON(READ_ONCE(tick_do_timer_cpu) != smp_processor_id()); > > - tick_do_timer_cpu = cpu; > + WRITE_ONCE(tick_do_timer_cpu, cpu); > } > > static void tick_take_do_timer_from_boot(void) > @@ -217,8 +217,8 @@ static void tick_setup_device(struct tick_device *td, > * If no cpu took the do_timer update, assign it to > * this cpu: > */ > - if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) { > - tick_do_timer_cpu = cpu; > + if (READ_ONCE(tick_do_timer_cpu) == TICK_DO_TIMER_BOOT) { > + WRITE_ONCE(tick_do_timer_cpu, cpu); > > tick_next_period = ktime_get(); > #ifdef CONFIG_NO_HZ_FULL > @@ -235,7 +235,7 @@ static void tick_setup_device(struct tick_device *td, > !tick_nohz_full_cpu(cpu)) { > tick_take_do_timer_from_boot(); > tick_do_timer_boot_cpu = -1; > - WARN_ON(tick_do_timer_cpu != cpu); > + WARN_ON(READ_ONCE(tick_do_timer_cpu) != cpu); > #endif > } > > @@ -407,8 +407,8 @@ EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control); > */ > void tick_handover_do_timer(void) > { > - if (tick_do_timer_cpu == smp_processor_id()) > - tick_do_timer_cpu = cpumask_first(cpu_online_mask); > + if (READ_ONCE(tick_do_timer_cpu) == smp_processor_id()) > + WRITE_ONCE(tick_do_timer_cpu, cpumask_first(cpu_online_mask)); > } > > /* > diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c > index b0e3c9205946..2dac94b3187f 100644 > --- a/kernel/time/tick-sched.c > +++ b/kernel/time/tick-sched.c > @@ -186,16 +186,16 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) > * If nohz_full is enabled, this should not happen because the > * tick_do_timer_cpu never relinquishes. > */ > - if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) { > + if (unlikely(READ_ONCE(tick_do_timer_cpu) == TICK_DO_TIMER_NONE)) { > #ifdef CONFIG_NO_HZ_FULL > WARN_ON_ONCE(tick_nohz_full_running); > #endif > - tick_do_timer_cpu = cpu; > + WRITE_ONCE(tick_do_timer_cpu, cpu); > } > #endif > > /* Check, if the jiffies need an update */ > - if (tick_do_timer_cpu == cpu) > + if (READ_ONCE(tick_do_timer_cpu) == cpu) > tick_do_update_jiffies64(now); > > /* > @@ -534,7 +534,7 @@ static int tick_nohz_cpu_down(unsigned int cpu) > * timers, workqueues, timekeeping, ...) on behalf of full dynticks > * CPUs. It must remain online when nohz full is enabled. > */ > - if (tick_nohz_full_running && tick_do_timer_cpu == cpu) > + if (tick_nohz_full_running && READ_ONCE(tick_do_timer_cpu) == cpu) > return -EBUSY; > return 0; > } > @@ -849,8 +849,9 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) > * Otherwise we can sleep as long as we want. > */ > delta = timekeeping_max_deferment(); > - if (cpu != tick_do_timer_cpu && > - (tick_do_timer_cpu != TICK_DO_TIMER_NONE || !ts->do_timer_last)) > + if (cpu != READ_ONCE(tick_do_timer_cpu) && > + (READ_ONCE(tick_do_timer_cpu) != TICK_DO_TIMER_NONE > + || !ts->do_timer_last)) > delta = KTIME_MAX; > > /* Calculate the next expiry time */ > @@ -883,10 +884,10 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) > * do_timer() never invoked. Keep track of the fact that it > * was the one which had the do_timer() duty last. > */ > - if (cpu == tick_do_timer_cpu) { > - tick_do_timer_cpu = TICK_DO_TIMER_NONE; > + if (cpu == READ_ONCE(tick_do_timer_cpu)) { > + WRITE_ONCE(tick_do_timer_cpu, TICK_DO_TIMER_NONE); > ts->do_timer_last = 1; > - } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) { > + } else if (READ_ONCE(tick_do_timer_cpu) != TICK_DO_TIMER_NONE) { > ts->do_timer_last = 0; > } > > @@ -1049,8 +1050,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) > * invoked. > */ > if (unlikely(!cpu_online(cpu))) { > - if (cpu == tick_do_timer_cpu) > - tick_do_timer_cpu = TICK_DO_TIMER_NONE; > + if (cpu == READ_ONCE(tick_do_timer_cpu)) > + WRITE_ONCE(tick_do_timer_cpu, TICK_DO_TIMER_NONE); > /* > * Make sure the CPU doesn't get fooled by obsolete tick > * deadline if it comes back online later. > @@ -1073,11 +1074,12 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) > * Keep the tick alive to guarantee timekeeping progression > * if there are full dynticks CPUs around > */ > - if (tick_do_timer_cpu == cpu) > + if (READ_ONCE(tick_do_timer_cpu) == cpu) > return false; > > /* Should not happen for nohz-full */ > - if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) > + if (WARN_ON_ONCE(READ_ONCE(tick_do_timer_cpu) > + == TICK_DO_TIMER_NONE)) > return false; > } > > -- > 2.34.1 >
On Mon, Dec 19, 2022 at 01:21:28PM +0800, Zhouyi Zhou wrote: > mark access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE to fix concurrency bug > reported by KCSAN. > > Signed-off-by: Zhouyi Zhou <zhouzhouyi@gmail.com> > --- > During the rcutorture test on linux-next, > ./tools/testing/selftests/rcutorture/bin/torture.sh --do-kcsan --kcsan-kmake-arg "CC=clang-12" > following KCSAN BUG is reported: > [ 35.397089] BUG: KCSAN: data-race in tick_nohz_idle_stop_tick / tick_nohz_next_event^M > [ 35.400593] ^M > [ 35.401377] write to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 3:^M > [ 35.405325] tick_nohz_idle_stop_tick+0x14c/0x3e0^M > [ 35.407162] do_idle+0xf3/0x2a0^M > [ 35.408016] cpu_startup_entry+0x15/0x20^M > [ 35.409084] start_secondary+0x8f/0x90^M > [ 35.410207] secondary_startup_64_no_verify+0xe1/0xeb^M > [ 35.411607] ^M > [ 35.412042] no locks held by swapper/3/0.^M > [ 35.413172] irq event stamp: 53048^M > [ 35.414175] hardirqs last enabled at (53047): [<ffffffffb41f8404>] tick_nohz_idle_enter+0x104/0x140^M > [ 35.416681] hardirqs last disabled at (53048): [<ffffffffb41229f1>] do_idle+0x91/0x2a0^M > [ 35.418988] softirqs last enabled at (53038): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M > [ 35.421347] softirqs last disabled at (53029): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M > [ 35.423685] ^M > [ 35.424119] read to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 0:^M > [ 35.425870] tick_nohz_next_event+0x233/0x2b0^M > [ 35.427119] tick_nohz_idle_stop_tick+0x8f/0x3e0^M > [ 35.428386] do_idle+0xf3/0x2a0^M > [ 35.429265] cpu_startup_entry+0x15/0x20^M > [ 35.430429] rest_init+0x20c/0x210^M > [ 35.431382] arch_call_rest_init+0xe/0x10^M > [ 35.432508] start_kernel+0x544/0x600^M > [ 35.433519] secondary_startup_64_no_verify+0xe1/0xeb^M > > fix above bug by marking access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE This has been discussed before with passion: http://archive.lwn.net:8080/linux-kernel/1C65422C-FFA4-4651-893B-300FAF9C49DE@lca.pw/T/ To me data_race() would be more appropriate but that would need a changelog with proper analysis of the tick_do_timer_cpu state machine. One more thing on my TODO list, but feel free to beat me at it :-) Thanks.
On Mon, Jan 9, 2023 at 8:51 PM Frederic Weisbecker <frederic@kernel.org> wrote: > > On Mon, Dec 19, 2022 at 01:21:28PM +0800, Zhouyi Zhou wrote: > > mark access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE to fix concurrency bug > > reported by KCSAN. > > > > Signed-off-by: Zhouyi Zhou <zhouzhouyi@gmail.com> > > --- > > During the rcutorture test on linux-next, > > ./tools/testing/selftests/rcutorture/bin/torture.sh --do-kcsan --kcsan-kmake-arg "CC=clang-12" > > following KCSAN BUG is reported: > > [ 35.397089] BUG: KCSAN: data-race in tick_nohz_idle_stop_tick / tick_nohz_next_event^M > > [ 35.400593] ^M > > [ 35.401377] write to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 3:^M > > [ 35.405325] tick_nohz_idle_stop_tick+0x14c/0x3e0^M > > [ 35.407162] do_idle+0xf3/0x2a0^M > > [ 35.408016] cpu_startup_entry+0x15/0x20^M > > [ 35.409084] start_secondary+0x8f/0x90^M > > [ 35.410207] secondary_startup_64_no_verify+0xe1/0xeb^M > > [ 35.411607] ^M > > [ 35.412042] no locks held by swapper/3/0.^M > > [ 35.413172] irq event stamp: 53048^M > > [ 35.414175] hardirqs last enabled at (53047): [<ffffffffb41f8404>] tick_nohz_idle_enter+0x104/0x140^M > > [ 35.416681] hardirqs last disabled at (53048): [<ffffffffb41229f1>] do_idle+0x91/0x2a0^M > > [ 35.418988] softirqs last enabled at (53038): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M > > [ 35.421347] softirqs last disabled at (53029): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M > > [ 35.423685] ^M > > [ 35.424119] read to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 0:^M > > [ 35.425870] tick_nohz_next_event+0x233/0x2b0^M > > [ 35.427119] tick_nohz_idle_stop_tick+0x8f/0x3e0^M > > [ 35.428386] do_idle+0xf3/0x2a0^M > > [ 35.429265] cpu_startup_entry+0x15/0x20^M > > [ 35.430429] rest_init+0x20c/0x210^M > > [ 35.431382] arch_call_rest_init+0xe/0x10^M > > [ 35.432508] start_kernel+0x544/0x600^M > > [ 35.433519] secondary_startup_64_no_verify+0xe1/0xeb^M > > > > fix above bug by marking access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE > > This has been discussed before with passion: > > http://archive.lwn.net:8080/linux-kernel/1C65422C-FFA4-4651-893B-300FAF9C49DE@lca.pw/T/ > Thank you for your guidance, yes, the discussion is very passionate ;-) and I learned a lot from the above thread ;-) > To me data_race() would be more appropriate but that would need a changelog with > proper analysis of the tick_do_timer_cpu state machine. I Agree, and also I learned to do more rigorous technology research. > > One more thing on my TODO list, but feel free to beat me at it :-) Thank you for your concern ;-) Thanks Zhouyi > > Thanks.
On Mon, Jan 09, 2023 at 01:51:29PM +0100, Frederic Weisbecker wrote: > On Mon, Dec 19, 2022 at 01:21:28PM +0800, Zhouyi Zhou wrote: > > mark access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE to fix concurrency bug > > reported by KCSAN. > > > > Signed-off-by: Zhouyi Zhou <zhouzhouyi@gmail.com> > > --- > > During the rcutorture test on linux-next, > > ./tools/testing/selftests/rcutorture/bin/torture.sh --do-kcsan --kcsan-kmake-arg "CC=clang-12" > > following KCSAN BUG is reported: > > [ 35.397089] BUG: KCSAN: data-race in tick_nohz_idle_stop_tick / tick_nohz_next_event^M > > [ 35.400593] ^M > > [ 35.401377] write to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 3:^M > > [ 35.405325] tick_nohz_idle_stop_tick+0x14c/0x3e0^M > > [ 35.407162] do_idle+0xf3/0x2a0^M > > [ 35.408016] cpu_startup_entry+0x15/0x20^M > > [ 35.409084] start_secondary+0x8f/0x90^M > > [ 35.410207] secondary_startup_64_no_verify+0xe1/0xeb^M > > [ 35.411607] ^M > > [ 35.412042] no locks held by swapper/3/0.^M > > [ 35.413172] irq event stamp: 53048^M > > [ 35.414175] hardirqs last enabled at (53047): [<ffffffffb41f8404>] tick_nohz_idle_enter+0x104/0x140^M > > [ 35.416681] hardirqs last disabled at (53048): [<ffffffffb41229f1>] do_idle+0x91/0x2a0^M > > [ 35.418988] softirqs last enabled at (53038): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M > > [ 35.421347] softirqs last disabled at (53029): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M > > [ 35.423685] ^M > > [ 35.424119] read to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 0:^M > > [ 35.425870] tick_nohz_next_event+0x233/0x2b0^M > > [ 35.427119] tick_nohz_idle_stop_tick+0x8f/0x3e0^M > > [ 35.428386] do_idle+0xf3/0x2a0^M > > [ 35.429265] cpu_startup_entry+0x15/0x20^M > > [ 35.430429] rest_init+0x20c/0x210^M > > [ 35.431382] arch_call_rest_init+0xe/0x10^M > > [ 35.432508] start_kernel+0x544/0x600^M > > [ 35.433519] secondary_startup_64_no_verify+0xe1/0xeb^M > > > > fix above bug by marking access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE > > This has been discussed before with passion: > > http://archive.lwn.net:8080/linux-kernel/1C65422C-FFA4-4651-893B-300FAF9C49DE@lca.pw/T/ > > To me data_race() would be more appropriate but that would need a changelog with > proper analysis of the tick_do_timer_cpu state machine. Please also an analysis of why the compiler cannot do any destructive optimizations in this case. Maybe also comments. > One more thing on my TODO list, but feel free to beat me at it :-) I know that feeling! ;-) Thanx, Paul
On Mon, Jan 9, 2023 at 11:25 PM Paul E. McKenney <paulmck@kernel.org> wrote: > > On Mon, Jan 09, 2023 at 01:51:29PM +0100, Frederic Weisbecker wrote: > > On Mon, Dec 19, 2022 at 01:21:28PM +0800, Zhouyi Zhou wrote: > > > mark access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE to fix concurrency bug > > > reported by KCSAN. > > > > > > Signed-off-by: Zhouyi Zhou <zhouzhouyi@gmail.com> > > > --- > > > During the rcutorture test on linux-next, > > > ./tools/testing/selftests/rcutorture/bin/torture.sh --do-kcsan --kcsan-kmake-arg "CC=clang-12" > > > following KCSAN BUG is reported: > > > [ 35.397089] BUG: KCSAN: data-race in tick_nohz_idle_stop_tick / tick_nohz_next_event^M > > > [ 35.400593] ^M > > > [ 35.401377] write to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 3:^M > > > [ 35.405325] tick_nohz_idle_stop_tick+0x14c/0x3e0^M > > > [ 35.407162] do_idle+0xf3/0x2a0^M > > > [ 35.408016] cpu_startup_entry+0x15/0x20^M > > > [ 35.409084] start_secondary+0x8f/0x90^M > > > [ 35.410207] secondary_startup_64_no_verify+0xe1/0xeb^M > > > [ 35.411607] ^M > > > [ 35.412042] no locks held by swapper/3/0.^M > > > [ 35.413172] irq event stamp: 53048^M > > > [ 35.414175] hardirqs last enabled at (53047): [<ffffffffb41f8404>] tick_nohz_idle_enter+0x104/0x140^M > > > [ 35.416681] hardirqs last disabled at (53048): [<ffffffffb41229f1>] do_idle+0x91/0x2a0^M > > > [ 35.418988] softirqs last enabled at (53038): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M > > > [ 35.421347] softirqs last disabled at (53029): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M > > > [ 35.423685] ^M > > > [ 35.424119] read to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 0:^M > > > [ 35.425870] tick_nohz_next_event+0x233/0x2b0^M > > > [ 35.427119] tick_nohz_idle_stop_tick+0x8f/0x3e0^M > > > [ 35.428386] do_idle+0xf3/0x2a0^M > > > [ 35.429265] cpu_startup_entry+0x15/0x20^M > > > [ 35.430429] rest_init+0x20c/0x210^M > > > [ 35.431382] arch_call_rest_init+0xe/0x10^M > > > [ 35.432508] start_kernel+0x544/0x600^M > > > [ 35.433519] secondary_startup_64_no_verify+0xe1/0xeb^M > > > > > > fix above bug by marking access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE > > > > This has been discussed before with passion: > > > > http://archive.lwn.net:8080/linux-kernel/1C65422C-FFA4-4651-893B-300FAF9C49DE@lca.pw/T/ > > > > To me data_race() would be more appropriate but that would need a changelog with > > proper analysis of the tick_do_timer_cpu state machine. > > Please also an analysis of why the compiler cannot do any destructive > optimizations in this case. Maybe also comments. I want to try the analysis above, as a newbie I have taught myself LLVM for 3 years in my spare time ;-) > > > One more thing on my TODO list, but feel free to beat me at it :-) Please take your time ;-) Please don't look my next possible email as a reminder ;-) > > I know that feeling! ;-) > Thanx, Zhouyi > Thanx, Paul
On Mon, Jan 09 2023 at 13:51, Frederic Weisbecker wrote: > On Mon, Dec 19, 2022 at 01:21:28PM +0800, Zhouyi Zhou wrote: >> fix above bug by marking access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE > > This has been discussed before with passion: > > http://archive.lwn.net:8080/linux-kernel/1C65422C-FFA4-4651-893B-300FAF9C49DE@lca.pw/T/ > > To me data_race() would be more appropriate but that would need a changelog with > proper analysis of the tick_do_timer_cpu state machine. Proper analysis and comments in the code are required independent of READ/WRITE_ONCE() or data_race(). Thanks, tglx
On Mon, Jan 9, 2023 at 11:59 PM Zhouyi Zhou <zhouzhouyi@gmail.com> wrote: > > On Mon, Jan 9, 2023 at 11:25 PM Paul E. McKenney <paulmck@kernel.org> wrote: > > > > On Mon, Jan 09, 2023 at 01:51:29PM +0100, Frederic Weisbecker wrote: > > > On Mon, Dec 19, 2022 at 01:21:28PM +0800, Zhouyi Zhou wrote: > > > > mark access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE to fix concurrency bug > > > > reported by KCSAN. > > > > > > > > Signed-off-by: Zhouyi Zhou <zhouzhouyi@gmail.com> > > > > --- > > > > During the rcutorture test on linux-next, > > > > ./tools/testing/selftests/rcutorture/bin/torture.sh --do-kcsan --kcsan-kmake-arg "CC=clang-12" > > > > following KCSAN BUG is reported: > > > > [ 35.397089] BUG: KCSAN: data-race in tick_nohz_idle_stop_tick / tick_nohz_next_event^M > > > > [ 35.400593] ^M > > > > [ 35.401377] write to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 3:^M > > > > [ 35.405325] tick_nohz_idle_stop_tick+0x14c/0x3e0^M > > > > [ 35.407162] do_idle+0xf3/0x2a0^M > > > > [ 35.408016] cpu_startup_entry+0x15/0x20^M > > > > [ 35.409084] start_secondary+0x8f/0x90^M > > > > [ 35.410207] secondary_startup_64_no_verify+0xe1/0xeb^M > > > > [ 35.411607] ^M > > > > [ 35.412042] no locks held by swapper/3/0.^M > > > > [ 35.413172] irq event stamp: 53048^M > > > > [ 35.414175] hardirqs last enabled at (53047): [<ffffffffb41f8404>] tick_nohz_idle_enter+0x104/0x140^M > > > > [ 35.416681] hardirqs last disabled at (53048): [<ffffffffb41229f1>] do_idle+0x91/0x2a0^M > > > > [ 35.418988] softirqs last enabled at (53038): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M > > > > [ 35.421347] softirqs last disabled at (53029): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M > > > > [ 35.423685] ^M > > > > [ 35.424119] read to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 0:^M > > > > [ 35.425870] tick_nohz_next_event+0x233/0x2b0^M > > > > [ 35.427119] tick_nohz_idle_stop_tick+0x8f/0x3e0^M > > > > [ 35.428386] do_idle+0xf3/0x2a0^M > > > > [ 35.429265] cpu_startup_entry+0x15/0x20^M > > > > [ 35.430429] rest_init+0x20c/0x210^M > > > > [ 35.431382] arch_call_rest_init+0xe/0x10^M > > > > [ 35.432508] start_kernel+0x544/0x600^M > > > > [ 35.433519] secondary_startup_64_no_verify+0xe1/0xeb^M > > > > > > > > fix above bug by marking access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE > > > > > > This has been discussed before with passion: > > > > > > http://archive.lwn.net:8080/linux-kernel/1C65422C-FFA4-4651-893B-300FAF9C49DE@lca.pw/T/ > > > > > > To me data_race() would be more appropriate but that would need a changelog with > > > proper analysis of the tick_do_timer_cpu state machine. > > > > Please also an analysis of why the compiler cannot do any destructive > > optimizations in this case. Maybe also comments. > I want to try the analysis above, as a newbie I have taught myself > LLVM for 3 years in my spare time ;-) I am continuing to study LLVM/Clang's optimization behaviour on tick_do_timer_cpu, it is very interesting and fascinating, but as a newbie, I still need 3-4 week to go, could you wait for me a little while? ;-) Many Thanks Zhouyi > > > > > One more thing on my TODO list, but feel free to beat me at it :-) > Please take your time ;-) Please don't look my next possible email > as a reminder ;-) > > > > I know that feeling! ;-) > > > Thanx, Zhouyi > > Thanx, Paul
On Mon, Jan 23, 2023 at 08:44:25AM +0800, Zhouyi Zhou wrote: > On Mon, Jan 9, 2023 at 11:59 PM Zhouyi Zhou <zhouzhouyi@gmail.com> wrote: > > > > On Mon, Jan 9, 2023 at 11:25 PM Paul E. McKenney <paulmck@kernel.org> wrote: > > > > > > On Mon, Jan 09, 2023 at 01:51:29PM +0100, Frederic Weisbecker wrote: > > > > On Mon, Dec 19, 2022 at 01:21:28PM +0800, Zhouyi Zhou wrote: > > > > > mark access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE to fix concurrency bug > > > > > reported by KCSAN. > > > > > > > > > > Signed-off-by: Zhouyi Zhou <zhouzhouyi@gmail.com> > > > > > --- > > > > > During the rcutorture test on linux-next, > > > > > ./tools/testing/selftests/rcutorture/bin/torture.sh --do-kcsan --kcsan-kmake-arg "CC=clang-12" > > > > > following KCSAN BUG is reported: > > > > > [ 35.397089] BUG: KCSAN: data-race in tick_nohz_idle_stop_tick / tick_nohz_next_event^M > > > > > [ 35.400593] ^M > > > > > [ 35.401377] write to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 3:^M > > > > > [ 35.405325] tick_nohz_idle_stop_tick+0x14c/0x3e0^M > > > > > [ 35.407162] do_idle+0xf3/0x2a0^M > > > > > [ 35.408016] cpu_startup_entry+0x15/0x20^M > > > > > [ 35.409084] start_secondary+0x8f/0x90^M > > > > > [ 35.410207] secondary_startup_64_no_verify+0xe1/0xeb^M > > > > > [ 35.411607] ^M > > > > > [ 35.412042] no locks held by swapper/3/0.^M > > > > > [ 35.413172] irq event stamp: 53048^M > > > > > [ 35.414175] hardirqs last enabled at (53047): [<ffffffffb41f8404>] tick_nohz_idle_enter+0x104/0x140^M > > > > > [ 35.416681] hardirqs last disabled at (53048): [<ffffffffb41229f1>] do_idle+0x91/0x2a0^M > > > > > [ 35.418988] softirqs last enabled at (53038): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M > > > > > [ 35.421347] softirqs last disabled at (53029): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M > > > > > [ 35.423685] ^M > > > > > [ 35.424119] read to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 0:^M > > > > > [ 35.425870] tick_nohz_next_event+0x233/0x2b0^M > > > > > [ 35.427119] tick_nohz_idle_stop_tick+0x8f/0x3e0^M > > > > > [ 35.428386] do_idle+0xf3/0x2a0^M > > > > > [ 35.429265] cpu_startup_entry+0x15/0x20^M > > > > > [ 35.430429] rest_init+0x20c/0x210^M > > > > > [ 35.431382] arch_call_rest_init+0xe/0x10^M > > > > > [ 35.432508] start_kernel+0x544/0x600^M > > > > > [ 35.433519] secondary_startup_64_no_verify+0xe1/0xeb^M > > > > > > > > > > fix above bug by marking access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE > > > > > > > > This has been discussed before with passion: > > > > > > > > http://archive.lwn.net:8080/linux-kernel/1C65422C-FFA4-4651-893B-300FAF9C49DE@lca.pw/T/ > > > > > > > > To me data_race() would be more appropriate but that would need a changelog with > > > > proper analysis of the tick_do_timer_cpu state machine. > > > > > > Please also an analysis of why the compiler cannot do any destructive > > > optimizations in this case. Maybe also comments. > > I want to try the analysis above, as a newbie I have taught myself > > LLVM for 3 years in my spare time ;-) > > I am continuing to study LLVM/Clang's optimization behaviour on > tick_do_timer_cpu, it is very interesting and fascinating, but as a > newbie, I still need 3-4 week to go, could you wait for me a little > while? ;-) Please do take the time to get it right and to get it correctly documented. ;-) Thanx, Paul > Many Thanks > Zhouyi > > > > > > > One more thing on my TODO list, but feel free to beat me at it :-) > > Please take your time ;-) Please don't look my next possible email > > as a reminder ;-) > > > > > > I know that feeling! ;-) > > > > > Thanx, Zhouyi > > > Thanx, Paul
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 46789356f856..a7753465a0be 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -84,7 +84,7 @@ int tick_is_oneshot_available(void) */ static void tick_periodic(int cpu) { - if (tick_do_timer_cpu == cpu) { + if (READ_ONCE(tick_do_timer_cpu) == cpu) { raw_spin_lock(&jiffies_lock); write_seqcount_begin(&jiffies_seq); @@ -184,9 +184,9 @@ static void giveup_do_timer(void *info) { int cpu = *(unsigned int *)info; - WARN_ON(tick_do_timer_cpu != smp_processor_id()); + WARN_ON(READ_ONCE(tick_do_timer_cpu) != smp_processor_id()); - tick_do_timer_cpu = cpu; + WRITE_ONCE(tick_do_timer_cpu, cpu); } static void tick_take_do_timer_from_boot(void) @@ -217,8 +217,8 @@ static void tick_setup_device(struct tick_device *td, * If no cpu took the do_timer update, assign it to * this cpu: */ - if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) { - tick_do_timer_cpu = cpu; + if (READ_ONCE(tick_do_timer_cpu) == TICK_DO_TIMER_BOOT) { + WRITE_ONCE(tick_do_timer_cpu, cpu); tick_next_period = ktime_get(); #ifdef CONFIG_NO_HZ_FULL @@ -235,7 +235,7 @@ static void tick_setup_device(struct tick_device *td, !tick_nohz_full_cpu(cpu)) { tick_take_do_timer_from_boot(); tick_do_timer_boot_cpu = -1; - WARN_ON(tick_do_timer_cpu != cpu); + WARN_ON(READ_ONCE(tick_do_timer_cpu) != cpu); #endif } @@ -407,8 +407,8 @@ EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control); */ void tick_handover_do_timer(void) { - if (tick_do_timer_cpu == smp_processor_id()) - tick_do_timer_cpu = cpumask_first(cpu_online_mask); + if (READ_ONCE(tick_do_timer_cpu) == smp_processor_id()) + WRITE_ONCE(tick_do_timer_cpu, cpumask_first(cpu_online_mask)); } /* diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index b0e3c9205946..2dac94b3187f 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -186,16 +186,16 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now) * If nohz_full is enabled, this should not happen because the * tick_do_timer_cpu never relinquishes. */ - if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) { + if (unlikely(READ_ONCE(tick_do_timer_cpu) == TICK_DO_TIMER_NONE)) { #ifdef CONFIG_NO_HZ_FULL WARN_ON_ONCE(tick_nohz_full_running); #endif - tick_do_timer_cpu = cpu; + WRITE_ONCE(tick_do_timer_cpu, cpu); } #endif /* Check, if the jiffies need an update */ - if (tick_do_timer_cpu == cpu) + if (READ_ONCE(tick_do_timer_cpu) == cpu) tick_do_update_jiffies64(now); /* @@ -534,7 +534,7 @@ static int tick_nohz_cpu_down(unsigned int cpu) * timers, workqueues, timekeeping, ...) on behalf of full dynticks * CPUs. It must remain online when nohz full is enabled. */ - if (tick_nohz_full_running && tick_do_timer_cpu == cpu) + if (tick_nohz_full_running && READ_ONCE(tick_do_timer_cpu) == cpu) return -EBUSY; return 0; } @@ -849,8 +849,9 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) * Otherwise we can sleep as long as we want. */ delta = timekeeping_max_deferment(); - if (cpu != tick_do_timer_cpu && - (tick_do_timer_cpu != TICK_DO_TIMER_NONE || !ts->do_timer_last)) + if (cpu != READ_ONCE(tick_do_timer_cpu) && + (READ_ONCE(tick_do_timer_cpu) != TICK_DO_TIMER_NONE + || !ts->do_timer_last)) delta = KTIME_MAX; /* Calculate the next expiry time */ @@ -883,10 +884,10 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) * do_timer() never invoked. Keep track of the fact that it * was the one which had the do_timer() duty last. */ - if (cpu == tick_do_timer_cpu) { - tick_do_timer_cpu = TICK_DO_TIMER_NONE; + if (cpu == READ_ONCE(tick_do_timer_cpu)) { + WRITE_ONCE(tick_do_timer_cpu, TICK_DO_TIMER_NONE); ts->do_timer_last = 1; - } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) { + } else if (READ_ONCE(tick_do_timer_cpu) != TICK_DO_TIMER_NONE) { ts->do_timer_last = 0; } @@ -1049,8 +1050,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) * invoked. */ if (unlikely(!cpu_online(cpu))) { - if (cpu == tick_do_timer_cpu) - tick_do_timer_cpu = TICK_DO_TIMER_NONE; + if (cpu == READ_ONCE(tick_do_timer_cpu)) + WRITE_ONCE(tick_do_timer_cpu, TICK_DO_TIMER_NONE); /* * Make sure the CPU doesn't get fooled by obsolete tick * deadline if it comes back online later. @@ -1073,11 +1074,12 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) * Keep the tick alive to guarantee timekeeping progression * if there are full dynticks CPUs around */ - if (tick_do_timer_cpu == cpu) + if (READ_ONCE(tick_do_timer_cpu) == cpu) return false; /* Should not happen for nohz-full */ - if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) + if (WARN_ON_ONCE(READ_ONCE(tick_do_timer_cpu) + == TICK_DO_TIMER_NONE)) return false; }
mark access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE to fix concurrency bug reported by KCSAN. Signed-off-by: Zhouyi Zhou <zhouzhouyi@gmail.com> --- During the rcutorture test on linux-next, ./tools/testing/selftests/rcutorture/bin/torture.sh --do-kcsan --kcsan-kmake-arg "CC=clang-12" following KCSAN BUG is reported: [ 35.397089] BUG: KCSAN: data-race in tick_nohz_idle_stop_tick / tick_nohz_next_event^M [ 35.400593] ^M [ 35.401377] write to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 3:^M [ 35.405325] tick_nohz_idle_stop_tick+0x14c/0x3e0^M [ 35.407162] do_idle+0xf3/0x2a0^M [ 35.408016] cpu_startup_entry+0x15/0x20^M [ 35.409084] start_secondary+0x8f/0x90^M [ 35.410207] secondary_startup_64_no_verify+0xe1/0xeb^M [ 35.411607] ^M [ 35.412042] no locks held by swapper/3/0.^M [ 35.413172] irq event stamp: 53048^M [ 35.414175] hardirqs last enabled at (53047): [<ffffffffb41f8404>] tick_nohz_idle_enter+0x104/0x140^M [ 35.416681] hardirqs last disabled at (53048): [<ffffffffb41229f1>] do_idle+0x91/0x2a0^M [ 35.418988] softirqs last enabled at (53038): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M [ 35.421347] softirqs last disabled at (53029): [<ffffffffb40bf21e>] __irq_exit_rcu+0x6e/0xc0^M [ 35.423685] ^M [ 35.424119] read to 0xffffffffb64b1270 of 4 bytes by task 0 on cpu 0:^M [ 35.425870] tick_nohz_next_event+0x233/0x2b0^M [ 35.427119] tick_nohz_idle_stop_tick+0x8f/0x3e0^M [ 35.428386] do_idle+0xf3/0x2a0^M [ 35.429265] cpu_startup_entry+0x15/0x20^M [ 35.430429] rest_init+0x20c/0x210^M [ 35.431382] arch_call_rest_init+0xe/0x10^M [ 35.432508] start_kernel+0x544/0x600^M [ 35.433519] secondary_startup_64_no_verify+0xe1/0xeb^M fix above bug by marking access to tick_do_timer_cpu with READ_ONCE/WRITE_ONCE Thanks Zhouyi -- kernel/time/tick-common.c | 16 ++++++++-------- kernel/time/tick-sched.c | 28 +++++++++++++++------------- 2 files changed, 23 insertions(+), 21 deletions(-)