diff mbox series

rcu/nocb: Fix the WARN_ON_ONCE() in rcu_nocb_rdp_deoffload()

Message ID 20241020125119.14751-1-qiang.zhang1211@gmail.com (mailing list archive)
State Superseded
Headers show
Series rcu/nocb: Fix the WARN_ON_ONCE() in rcu_nocb_rdp_deoffload() | expand

Commit Message

Zqiang Oct. 20, 2024, 12:51 p.m. UTC
Currently, running rcutorture test with torture_type=rcu fwd_progress=8
n_barrier_cbs=8 nocbs_nthreads=8 nocbs_toggle=100 onoff_interval=60
test_boost=2, will trigger the following warning:

WARNING: CPU: 19 PID: 100 at kernel/rcu/tree_nocb.h:1061 rcu_nocb_rdp_deoffload+0x292/0x2a0
RIP: 0010:rcu_nocb_rdp_deoffload+0x292/0x2a0
[18839.537322] Call Trace:
[18839.538006]  <TASK>
[18839.538596]  ? __warn+0x7e/0x120
[18839.539491]  ? rcu_nocb_rdp_deoffload+0x292/0x2a0
[18839.540757]  ? report_bug+0x18e/0x1a0
[18839.541805]  ? handle_bug+0x3d/0x70
[18839.542837]  ? exc_invalid_op+0x18/0x70
[18839.543959]  ? asm_exc_invalid_op+0x1a/0x20
[18839.545165]  ? rcu_nocb_rdp_deoffload+0x292/0x2a0
[18839.546547]  rcu_nocb_cpu_deoffload+0x70/0xa0
[18839.547814]  rcu_nocb_toggle+0x136/0x1c0
[18839.548960]  ? __pfx_rcu_nocb_toggle+0x10/0x10
[18839.550073]  kthread+0xd1/0x100
[18839.550958]  ? __pfx_kthread+0x10/0x10
[18839.552008]  ret_from_fork+0x2f/0x50
[18839.553002]  ? __pfx_kthread+0x10/0x10
[18839.553968]  ret_from_fork_asm+0x1a/0x30
[18839.555038]  </TASK>

CPU0                               CPU2                          CPU3
//rcu_nocb_toggle             //nocb_cb_wait                   //rcutorture

// deoffload CPU1             // process CPU1's rdp
rcu_barrier()
    rcu_segcblist_entrain()
        rcu_segcblist_add_len(1);
        // len == 2
        // enqueue barrier
        // callback to CPU1's
        // rdp->cblist
                             rcu_do_batch()
                                 // invoke CPU1's rdp->cblist
                                 // callback
                                 rcu_barrier_callback()
                                                             rcu_barrier()
                                                               mutex_lock(&rcu_state.barrier_mutex);
                                                               // still see len == 2
                                                               // enqueue barrier callback
                                                               // to CPU1's rdp->cblist
                                                               rcu_segcblist_entrain()
                                                                   rcu_segcblist_add_len(1);
                                                                   // len == 3
                                 // decrement len
                                 rcu_segcblist_add_len(-2);
                             kthread_parkme()

// CPU1's rdp->cblist len == 1
// Warn because there is
// still a pending barrier
// trigger warning
WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist));
cpus_read_unlock();

                                                                // wait CPU1 comes online
                                                                // invoke barrier callback on
                                                                // CPU1 rdp's->cblist
                                                                wait_for_completion(&rcu_state.barrier_completion);
// deoffload CPU4
cpus_read_lock()
  rcu_barrier()
    mutex_lock(&rcu_state.barrier_mutex);
    // block on barrier_mutex
    // wait rcu_barrier() on
    // CPU3 to unlock barrier_mutex
    // but CPU3 unlock barrier_mutex
    // need to wait CPU1 comes online
    // when CPU1 going online will block on cpus_write_lock

The above scenario will not only trigger WARN_ON_ONCE(), but also
trigger deadlock, this commit therefore check rdp->cblist length
before invoke kthread_parkme(), and the kthread_parkme() is not
invoke until length reaches zero.

Signed-off-by: Zqiang <qiang.zhang1211@gmail.com>
---
 kernel/rcu/tree_nocb.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

Comments

Frederic Weisbecker Oct. 20, 2024, 11:14 p.m. UTC | #1
Le Sun, Oct 20, 2024 at 08:51:19PM +0800, Zqiang a écrit :
> Currently, running rcutorture test with torture_type=rcu fwd_progress=8
> n_barrier_cbs=8 nocbs_nthreads=8 nocbs_toggle=100 onoff_interval=60
> test_boost=2, will trigger the following warning:
> 
> WARNING: CPU: 19 PID: 100 at kernel/rcu/tree_nocb.h:1061 rcu_nocb_rdp_deoffload+0x292/0x2a0
> RIP: 0010:rcu_nocb_rdp_deoffload+0x292/0x2a0
> [18839.537322] Call Trace:
> [18839.538006]  <TASK>
> [18839.538596]  ? __warn+0x7e/0x120
> [18839.539491]  ? rcu_nocb_rdp_deoffload+0x292/0x2a0
> [18839.540757]  ? report_bug+0x18e/0x1a0
> [18839.541805]  ? handle_bug+0x3d/0x70
> [18839.542837]  ? exc_invalid_op+0x18/0x70
> [18839.543959]  ? asm_exc_invalid_op+0x1a/0x20
> [18839.545165]  ? rcu_nocb_rdp_deoffload+0x292/0x2a0
> [18839.546547]  rcu_nocb_cpu_deoffload+0x70/0xa0
> [18839.547814]  rcu_nocb_toggle+0x136/0x1c0
> [18839.548960]  ? __pfx_rcu_nocb_toggle+0x10/0x10
> [18839.550073]  kthread+0xd1/0x100
> [18839.550958]  ? __pfx_kthread+0x10/0x10
> [18839.552008]  ret_from_fork+0x2f/0x50
> [18839.553002]  ? __pfx_kthread+0x10/0x10
> [18839.553968]  ret_from_fork_asm+0x1a/0x30
> [18839.555038]  </TASK>
> 
> CPU0                               CPU2                          CPU3
> //rcu_nocb_toggle             //nocb_cb_wait                   //rcutorture
> 
> // deoffload CPU1             // process CPU1's rdp
> rcu_barrier()
>     rcu_segcblist_entrain()
>         rcu_segcblist_add_len(1);
>         // len == 2
>         // enqueue barrier
>         // callback to CPU1's
>         // rdp->cblist
>                              rcu_do_batch()
>                                  // invoke CPU1's rdp->cblist
>                                  // callback
>                                  rcu_barrier_callback()
>                                                              rcu_barrier()
>                                                                mutex_lock(&rcu_state.barrier_mutex);
>                                                                // still see len == 2
>                                                                // enqueue barrier callback
>                                                                // to CPU1's rdp->cblist
>                                                                rcu_segcblist_entrain()
>                                                                    rcu_segcblist_add_len(1);
>                                                                    // len == 3
>                                  // decrement len
>                                  rcu_segcblist_add_len(-2);
>                              kthread_parkme()
> 
> // CPU1's rdp->cblist len == 1
> // Warn because there is
> // still a pending barrier
> // trigger warning
> WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist));
> cpus_read_unlock();
> 
>                                                                 // wait CPU1 comes online
>                                                                 // invoke barrier callback on
>                                                                 // CPU1 rdp's->cblist
>                                                                 wait_for_completion(&rcu_state.barrier_completion);
> // deoffload CPU4
> cpus_read_lock()
>   rcu_barrier()
>     mutex_lock(&rcu_state.barrier_mutex);
>     // block on barrier_mutex
>     // wait rcu_barrier() on
>     // CPU3 to unlock barrier_mutex
>     // but CPU3 unlock barrier_mutex
>     // need to wait CPU1 comes online
>     // when CPU1 going online will block on cpus_write_lock
> 
> The above scenario will not only trigger WARN_ON_ONCE(), but also
> trigger deadlock, this commit therefore check rdp->cblist length
> before invoke kthread_parkme(), and the kthread_parkme() is not
> invoke until length reaches zero.
> 
> Signed-off-by: Zqiang <qiang.zhang1211@gmail.com>
> ---
>  kernel/rcu/tree_nocb.h | 8 +++++++-
>  1 file changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
> index 8648233e1717..a2b0ebdefee3 100644
> --- a/kernel/rcu/tree_nocb.h
> +++ b/kernel/rcu/tree_nocb.h
> @@ -893,6 +893,12 @@ static inline bool nocb_cb_wait_cond(struct rcu_data *rdp)
>  	return !READ_ONCE(rdp->nocb_cb_sleep) || kthread_should_park();
>  }
>  
> +static inline bool nocb_cblist_empty(struct rcu_data *rdp)
> +{
> +	return !(rcu_rdp_is_offloaded(rdp) &&

But the rdp has to be offloaded when nocb_cb_wait() is running, and that
include the times when it is parking and when it is unparking.

> +		WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist)));

And like your scenario above shows, it's possible to reach here with
callbacks. So this check shouldn't be a warning at that point?

> +}
> +
>  /*
>   * Invoke any ready callbacks from the corresponding no-CBs CPU,
>   * then, if there are no more, wait for more to appear.
> @@ -907,7 +913,7 @@ static void nocb_cb_wait(struct rcu_data *rdp)
>  
>  	swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
>  					    nocb_cb_wait_cond(rdp));
> -	if (kthread_should_park()) {
> +	if (kthread_should_park() && nocb_cblist_empty(rdp)) {

What about this instead? If the second barrier is queued before
the final check to rcu_segcblist_ready_cbs() in nocb_cb_wait(), this
will be noticed and ->nocb_cb_sleep will remain false. If otherwise rcu_barrier()
is called after that final rcu_segcblist_ready_cbs() check, it will observe
the final decrement to zero and won't entrain the callback.

diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 16865475120b..0de07d44646c 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -891,7 +891,19 @@ static void nocb_cb_wait(struct rcu_data *rdp)
 	swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
 					    nocb_cb_wait_cond(rdp));
 	if (kthread_should_park()) {
-		kthread_parkme();
+		/*
+		 * kthread_park() must be preceded by an rcu_barrier().
+		 * But yet another rcu_barrier() might have sneaked in between
+		 * the barrier callback execution and the callbacks counter
+		 * decrement.
+		 */
+		if (rdp->nocb_cb_sleep) {
+			rcu_nocb_lock_irqsave(rdp, flags);
+			WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist));
+			rcu_nocb_unlock_irqrestore(rdp, flags);
+
+			kthread_parkme();
+		}
 	} else if (READ_ONCE(rdp->nocb_cb_sleep)) {
 		WARN_ON(signal_pending(current));
 		trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty"));
Zqiang Oct. 21, 2024, 12:39 a.m. UTC | #2
>
> Le Sun, Oct 20, 2024 at 08:51:19PM +0800, Zqiang a écrit :
> > Currently, running rcutorture test with torture_type=rcu fwd_progress=8
> > n_barrier_cbs=8 nocbs_nthreads=8 nocbs_toggle=100 onoff_interval=60
> > test_boost=2, will trigger the following warning:
> >
> > WARNING: CPU: 19 PID: 100 at kernel/rcu/tree_nocb.h:1061 rcu_nocb_rdp_deoffload+0x292/0x2a0
> > RIP: 0010:rcu_nocb_rdp_deoffload+0x292/0x2a0
> > [18839.537322] Call Trace:
> > [18839.538006]  <TASK>
> > [18839.538596]  ? __warn+0x7e/0x120
> > [18839.539491]  ? rcu_nocb_rdp_deoffload+0x292/0x2a0
> > [18839.540757]  ? report_bug+0x18e/0x1a0
> > [18839.541805]  ? handle_bug+0x3d/0x70
> > [18839.542837]  ? exc_invalid_op+0x18/0x70
> > [18839.543959]  ? asm_exc_invalid_op+0x1a/0x20
> > [18839.545165]  ? rcu_nocb_rdp_deoffload+0x292/0x2a0
> > [18839.546547]  rcu_nocb_cpu_deoffload+0x70/0xa0
> > [18839.547814]  rcu_nocb_toggle+0x136/0x1c0
> > [18839.548960]  ? __pfx_rcu_nocb_toggle+0x10/0x10
> > [18839.550073]  kthread+0xd1/0x100
> > [18839.550958]  ? __pfx_kthread+0x10/0x10
> > [18839.552008]  ret_from_fork+0x2f/0x50
> > [18839.553002]  ? __pfx_kthread+0x10/0x10
> > [18839.553968]  ret_from_fork_asm+0x1a/0x30
> > [18839.555038]  </TASK>
> >
> > CPU0                               CPU2                          CPU3
> > //rcu_nocb_toggle             //nocb_cb_wait                   //rcutorture
> >
> > // deoffload CPU1             // process CPU1's rdp
> > rcu_barrier()
> >     rcu_segcblist_entrain()
> >         rcu_segcblist_add_len(1);
> >         // len == 2
> >         // enqueue barrier
> >         // callback to CPU1's
> >         // rdp->cblist
> >                              rcu_do_batch()
> >                                  // invoke CPU1's rdp->cblist
> >                                  // callback
> >                                  rcu_barrier_callback()
> >                                                              rcu_barrier()
> >                                                                mutex_lock(&rcu_state.barrier_mutex);
> >                                                                // still see len == 2
> >                                                                // enqueue barrier callback
> >                                                                // to CPU1's rdp->cblist
> >                                                                rcu_segcblist_entrain()
> >                                                                    rcu_segcblist_add_len(1);
> >                                                                    // len == 3
> >                                  // decrement len
> >                                  rcu_segcblist_add_len(-2);
> >                              kthread_parkme()
> >
> > // CPU1's rdp->cblist len == 1
> > // Warn because there is
> > // still a pending barrier
> > // trigger warning
> > WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist));
> > cpus_read_unlock();
> >
> >                                                                 // wait CPU1 comes online
> >                                                                 // invoke barrier callback on
> >                                                                 // CPU1 rdp's->cblist
> >                                                                 wait_for_completion(&rcu_state.barrier_completion);
> > // deoffload CPU4
> > cpus_read_lock()
> >   rcu_barrier()
> >     mutex_lock(&rcu_state.barrier_mutex);
> >     // block on barrier_mutex
> >     // wait rcu_barrier() on
> >     // CPU3 to unlock barrier_mutex
> >     // but CPU3 unlock barrier_mutex
> >     // need to wait CPU1 comes online
> >     // when CPU1 going online will block on cpus_write_lock
> >
> > The above scenario will not only trigger WARN_ON_ONCE(), but also
> > trigger deadlock, this commit therefore check rdp->cblist length
> > before invoke kthread_parkme(), and the kthread_parkme() is not
> > invoke until length reaches zero.
> >
> > Signed-off-by: Zqiang <qiang.zhang1211@gmail.com>
> > ---
> >  kernel/rcu/tree_nocb.h | 8 +++++++-
> >  1 file changed, 7 insertions(+), 1 deletion(-)
> >
> > diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
> > index 8648233e1717..a2b0ebdefee3 100644
> > --- a/kernel/rcu/tree_nocb.h
> > +++ b/kernel/rcu/tree_nocb.h
> > @@ -893,6 +893,12 @@ static inline bool nocb_cb_wait_cond(struct rcu_data *rdp)
> >       return !READ_ONCE(rdp->nocb_cb_sleep) || kthread_should_park();
> >  }
> >
> > +static inline bool nocb_cblist_empty(struct rcu_data *rdp)
> > +{
> > +     return !(rcu_rdp_is_offloaded(rdp) &&
>
> But the rdp has to be offloaded when nocb_cb_wait() is running, and that
> include the times when it is parking and when it is unparking.
>
> > +             WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist)));
>
> And like your scenario above shows, it's possible to reach here with
> callbacks. So this check shouldn't be a warning at that point?

Yes, the WARN_ON_ONCE() should be removed.

>
> > +}
> > +
> >  /*
> >   * Invoke any ready callbacks from the corresponding no-CBs CPU,
> >   * then, if there are no more, wait for more to appear.
> > @@ -907,7 +913,7 @@ static void nocb_cb_wait(struct rcu_data *rdp)
> >
> >       swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
> >                                           nocb_cb_wait_cond(rdp));
> > -     if (kthread_should_park()) {
> > +     if (kthread_should_park() && nocb_cblist_empty(rdp)) {
>
> What about this instead? If the second barrier is queued before
> the final check to rcu_segcblist_ready_cbs() in nocb_cb_wait(), this
> will be noticed and ->nocb_cb_sleep will remain false. If otherwise rcu_barrier()
> is called after that final rcu_segcblist_ready_cbs() check, it will observe
> the final decrement to zero and won't entrain the callback.
>
> diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
> index 16865475120b..0de07d44646c 100644
> --- a/kernel/rcu/tree_nocb.h
> +++ b/kernel/rcu/tree_nocb.h
> @@ -891,7 +891,19 @@ static void nocb_cb_wait(struct rcu_data *rdp)
>         swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
>                                             nocb_cb_wait_cond(rdp));
>         if (kthread_should_park()) {
> -               kthread_parkme();
> +               /*
> +                * kthread_park() must be preceded by an rcu_barrier().
> +                * But yet another rcu_barrier() might have sneaked in between
> +                * the barrier callback execution and the callbacks counter
> +                * decrement.
> +                */
> +               if (rdp->nocb_cb_sleep) {

For the non-nocb cpus set during boot, the corresponding
rcuop kthread, we should park directly, otherwise
WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp)) will be triggered.

Should the conditions be like this?
if(!rcu_rdp_is_offloaded(rdp) || rdp->nocb_cb_sleep)


Thanks
Zqiang

> +                       rcu_nocb_lock_irqsave(rdp, flags);
> +                       WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist));
> +                       rcu_nocb_unlock_irqrestore(rdp, flags);
> +
> +                       kthread_parkme();
> +               }
>         } else if (READ_ONCE(rdp->nocb_cb_sleep)) {
>                 WARN_ON(signal_pending(current));
>                 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty"));
Zqiang Oct. 21, 2024, 11:01 a.m. UTC | #3
>
> >
> > Le Sun, Oct 20, 2024 at 08:51:19PM +0800, Zqiang a écrit :
> > > Currently, running rcutorture test with torture_type=rcu fwd_progress=8
> > > n_barrier_cbs=8 nocbs_nthreads=8 nocbs_toggle=100 onoff_interval=60
> > > test_boost=2, will trigger the following warning:
> > >
> > > WARNING: CPU: 19 PID: 100 at kernel/rcu/tree_nocb.h:1061 rcu_nocb_rdp_deoffload+0x292/0x2a0
> > > RIP: 0010:rcu_nocb_rdp_deoffload+0x292/0x2a0
> > > [18839.537322] Call Trace:
> > > [18839.538006]  <TASK>
> > > [18839.538596]  ? __warn+0x7e/0x120
> > > [18839.539491]  ? rcu_nocb_rdp_deoffload+0x292/0x2a0
> > > [18839.540757]  ? report_bug+0x18e/0x1a0
> > > [18839.541805]  ? handle_bug+0x3d/0x70
> > > [18839.542837]  ? exc_invalid_op+0x18/0x70
> > > [18839.543959]  ? asm_exc_invalid_op+0x1a/0x20
> > > [18839.545165]  ? rcu_nocb_rdp_deoffload+0x292/0x2a0
> > > [18839.546547]  rcu_nocb_cpu_deoffload+0x70/0xa0
> > > [18839.547814]  rcu_nocb_toggle+0x136/0x1c0
> > > [18839.548960]  ? __pfx_rcu_nocb_toggle+0x10/0x10
> > > [18839.550073]  kthread+0xd1/0x100
> > > [18839.550958]  ? __pfx_kthread+0x10/0x10
> > > [18839.552008]  ret_from_fork+0x2f/0x50
> > > [18839.553002]  ? __pfx_kthread+0x10/0x10
> > > [18839.553968]  ret_from_fork_asm+0x1a/0x30
> > > [18839.555038]  </TASK>
> > >
> > > CPU0                               CPU2                          CPU3
> > > //rcu_nocb_toggle             //nocb_cb_wait                   //rcutorture
> > >
> > > // deoffload CPU1             // process CPU1's rdp
> > > rcu_barrier()
> > >     rcu_segcblist_entrain()
> > >         rcu_segcblist_add_len(1);
> > >         // len == 2
> > >         // enqueue barrier
> > >         // callback to CPU1's
> > >         // rdp->cblist
> > >                              rcu_do_batch()
> > >                                  // invoke CPU1's rdp->cblist
> > >                                  // callback
> > >                                  rcu_barrier_callback()
> > >                                                              rcu_barrier()
> > >                                                                mutex_lock(&rcu_state.barrier_mutex);
> > >                                                                // still see len == 2
> > >                                                                // enqueue barrier callback
> > >                                                                // to CPU1's rdp->cblist
> > >                                                                rcu_segcblist_entrain()
> > >                                                                    rcu_segcblist_add_len(1);
> > >                                                                    // len == 3
> > >                                  // decrement len
> > >                                  rcu_segcblist_add_len(-2);
> > >                              kthread_parkme()
> > >
> > > // CPU1's rdp->cblist len == 1
> > > // Warn because there is
> > > // still a pending barrier
> > > // trigger warning
> > > WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist));
> > > cpus_read_unlock();
> > >
> > >                                                                 // wait CPU1 comes online
> > >                                                                 // invoke barrier callback on
> > >                                                                 // CPU1 rdp's->cblist
> > >                                                                 wait_for_completion(&rcu_state.barrier_completion);
> > > // deoffload CPU4
> > > cpus_read_lock()
> > >   rcu_barrier()
> > >     mutex_lock(&rcu_state.barrier_mutex);
> > >     // block on barrier_mutex
> > >     // wait rcu_barrier() on
> > >     // CPU3 to unlock barrier_mutex
> > >     // but CPU3 unlock barrier_mutex
> > >     // need to wait CPU1 comes online
> > >     // when CPU1 going online will block on cpus_write_lock
> > >
> > > The above scenario will not only trigger WARN_ON_ONCE(), but also
> > > trigger deadlock, this commit therefore check rdp->cblist length
> > > before invoke kthread_parkme(), and the kthread_parkme() is not
> > > invoke until length reaches zero.
> > >
> > > Signed-off-by: Zqiang <qiang.zhang1211@gmail.com>
> > > ---
> > >  kernel/rcu/tree_nocb.h | 8 +++++++-
> > >  1 file changed, 7 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
> > > index 8648233e1717..a2b0ebdefee3 100644
> > > --- a/kernel/rcu/tree_nocb.h
> > > +++ b/kernel/rcu/tree_nocb.h
> > > @@ -893,6 +893,12 @@ static inline bool nocb_cb_wait_cond(struct rcu_data *rdp)
> > >       return !READ_ONCE(rdp->nocb_cb_sleep) || kthread_should_park();
> > >  }
> > >
> > > +static inline bool nocb_cblist_empty(struct rcu_data *rdp)
> > > +{
> > > +     return !(rcu_rdp_is_offloaded(rdp) &&
> >
> > But the rdp has to be offloaded when nocb_cb_wait() is running, and that
> > include the times when it is parking and when it is unparking.
> >
> > > +             WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist)));
> >
> > And like your scenario above shows, it's possible to reach here with
> > callbacks. So this check shouldn't be a warning at that point?
>
> Yes, the WARN_ON_ONCE() should be removed.
>
> >
> > > +}
> > > +
> > >  /*
> > >   * Invoke any ready callbacks from the corresponding no-CBs CPU,
> > >   * then, if there are no more, wait for more to appear.
> > > @@ -907,7 +913,7 @@ static void nocb_cb_wait(struct rcu_data *rdp)
> > >
> > >       swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
> > >                                           nocb_cb_wait_cond(rdp));
> > > -     if (kthread_should_park()) {
> > > +     if (kthread_should_park() && nocb_cblist_empty(rdp)) {
> >
> > What about this instead? If the second barrier is queued before
> > the final check to rcu_segcblist_ready_cbs() in nocb_cb_wait(), this
> > will be noticed and ->nocb_cb_sleep will remain false. If otherwise rcu_barrier()
> > is called after that final rcu_segcblist_ready_cbs() check, it will observe
> > the final decrement to zero and won't entrain the callback.
> >
> > diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
> > index 16865475120b..0de07d44646c 100644
> > --- a/kernel/rcu/tree_nocb.h
> > +++ b/kernel/rcu/tree_nocb.h
> > @@ -891,7 +891,19 @@ static void nocb_cb_wait(struct rcu_data *rdp)
> >         swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
> >                                             nocb_cb_wait_cond(rdp));
> >         if (kthread_should_park()) {
> > -               kthread_parkme();
> > +               /*
> > +                * kthread_park() must be preceded by an rcu_barrier().
> > +                * But yet another rcu_barrier() might have sneaked in between
> > +                * the barrier callback execution and the callbacks counter
> > +                * decrement.
> > +                */
> > +               if (rdp->nocb_cb_sleep) {
>
> For the non-nocb cpus set during boot, the corresponding
> rcuop kthread, we should park directly, otherwise
> WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp)) will be triggered.
>
> Should the conditions be like this?
> if(!rcu_rdp_is_offloaded(rdp) || rdp->nocb_cb_sleep)
>
>

How about this?

diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 8648233e1717..14b70e662c9e 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -904,11 +904,27 @@ static void nocb_cb_wait(struct rcu_data *rdp)
        unsigned long flags;
        bool needwake_gp = false;
        struct rcu_node *rnp = rdp->mynode;
+       bool need_parkme = false;

        swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
                                            nocb_cb_wait_cond(rdp));
        if (kthread_should_park()) {
-               kthread_parkme();
+               /*
+                * kthread_park() must be preceded by an rcu_barrier().
+                * But yet another rcu_barrier() might have sneaked in between
+                * the barrier callback execution and the callbacks counter
+                * decrement.
+                */
+               if (!rcu_rdp_is_offloaded(rdp)) {
+                       need_parkme = true;
+               } else if (rdp->nocb_cb_sleep) {
+                       need_parkme = true;
+                       rcu_nocb_lock_irqsave(rdp, flags);
+                       WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist));
+                       rcu_nocb_unlock_irqrestore(rdp, flags);
+               }
+               if (need_parkme)
+                       kthread_parkme();
        } else if (READ_ONCE(rdp->nocb_cb_sleep)) {
                WARN_ON(signal_pending(current));
                trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty"));


> Thanks
> Zqiang
>
> > +                       rcu_nocb_lock_irqsave(rdp, flags);
> > +                       WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist));
> > +                       rcu_nocb_unlock_irqrestore(rdp, flags);
> > +
> > +                       kthread_parkme();
> > +               }
> >         } else if (READ_ONCE(rdp->nocb_cb_sleep)) {
> >                 WARN_ON(signal_pending(current));
> >                 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty"));
Frederic Weisbecker Oct. 21, 2024, 11:44 a.m. UTC | #4
Le Mon, Oct 21, 2024 at 07:01:02PM +0800, Z qiang a écrit :
> > For the non-nocb cpus set during boot, the corresponding
> > rcuop kthread, we should park directly, otherwise
> > WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp)) will be triggered.

Ah but this case is different. kthread_park() is called on
the kthread that is freshly created. In that case it is
parked before the kthread even had a chance to call its handler
(which is rcu_nocb_cb_kthread()).

See these lines in kthread():

	/* OK, tell user we're spawned, wait for stop or wakeup */
	__set_current_state(TASK_UNINTERRUPTIBLE);
	create->result = current;
	/*
	 * Thread is going to call schedule(), do not preempt it,
	 * or the creator may spend more time in wait_task_inactive().
	 */
	preempt_disable();
	complete(done);
	schedule_preempt_disabled();
	preempt_enable();

	ret = -EINTR;
	if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
		cgroup_kthread_ready();
		__kthread_parkme(self);
		ret = threadfn(data);
	}



So really rcu_rdp_is_offloaded() has to be true (but we can
warn if it's not. Though we already have a test for this in
nocb_cb_wait()).

Thanks.
Zqiang Oct. 21, 2024, 12:33 p.m. UTC | #5
>
> Le Mon, Oct 21, 2024 at 07:01:02PM +0800, Z qiang a écrit :
> > > For the non-nocb cpus set during boot, the corresponding
> > > rcuop kthread, we should park directly, otherwise
> > > WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp)) will be triggered.
>
> Ah but this case is different. kthread_park() is called on
> the kthread that is freshly created. In that case it is
> parked before the kthread even had a chance to call its handler
> (which is rcu_nocb_cb_kthread()).
>
> See these lines in kthread():
>
>         /* OK, tell user we're spawned, wait for stop or wakeup */
>         __set_current_state(TASK_UNINTERRUPTIBLE);
>         create->result = current;
>         /*
>          * Thread is going to call schedule(), do not preempt it,
>          * or the creator may spend more time in wait_task_inactive().
>          */
>         preempt_disable();
>         complete(done);
>         schedule_preempt_disabled();
>         preempt_enable();
>
>         ret = -EINTR;
>         if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
>                 cgroup_kthread_ready();
>                 __kthread_parkme(self);
>                 ret = threadfn(data);
>         }
>

Ah, Thanks!
get it, I ignored parkme in kthread(),
will update and resend.

Thanks
Zqiang


>
>
> So really rcu_rdp_is_offloaded() has to be true (but we can
> warn if it's not. Though we already have a test for this in
> nocb_cb_wait()).
>
> Thanks.
diff mbox series

Patch

diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 8648233e1717..a2b0ebdefee3 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -893,6 +893,12 @@  static inline bool nocb_cb_wait_cond(struct rcu_data *rdp)
 	return !READ_ONCE(rdp->nocb_cb_sleep) || kthread_should_park();
 }
 
+static inline bool nocb_cblist_empty(struct rcu_data *rdp)
+{
+	return !(rcu_rdp_is_offloaded(rdp) &&
+		WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist)));
+}
+
 /*
  * Invoke any ready callbacks from the corresponding no-CBs CPU,
  * then, if there are no more, wait for more to appear.
@@ -907,7 +913,7 @@  static void nocb_cb_wait(struct rcu_data *rdp)
 
 	swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
 					    nocb_cb_wait_cond(rdp));
-	if (kthread_should_park()) {
+	if (kthread_should_park() && nocb_cblist_empty(rdp)) {
 		kthread_parkme();
 	} else if (READ_ONCE(rdp->nocb_cb_sleep)) {
 		WARN_ON(signal_pending(current));