diff mbox series

[v2] rcu/nocb: Avoid polling when myrdp->nocb_head_rdp list is empty

Message ID 20220607075057.909070-1-qiang1.zhang@intel.com (mailing list archive)
State Superseded
Headers show
Series [v2] rcu/nocb: Avoid polling when myrdp->nocb_head_rdp list is empty | expand

Commit Message

Zqiang June 7, 2022, 7:50 a.m. UTC
Currently, If the 'rcu_nocb_poll' bootargs is enable, all rcuog kthreads
enter polling mode. however, due to only insert CPU's rdp which belong to
rcu_nocb_mask to 'nocb_head_rdp' list or all CPU's rdp served by rcuog
kthread have been de-offloaded, these cause the 'nocb_head_rdp' list
served by rcuog kthread is empty, when the 'nocb_head_rdp' is empty,
the rcuog kthread in polling mode not actually do anything. fix it by
exiting polling mode when the 'nocb_head_rdp'list is empty, otherwise
entering polling mode.

Signed-off-by: Zqiang <qiang1.zhang@intel.com>
---
 v1->v2:
 Move rcu_nocb_poll flags check from rdp_offload_toggle() to
 rcu_nocb_rdp_offload/deoffload(), avoid unnecessary setting of
 rdp_gp->nocb_gp_sleep flags, because when rcu_nocb_poll is set
 the rdp_gp->nocb_gp_sleep is not used.

 kernel/rcu/tree_nocb.h | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

Comments

Paul E. McKenney June 7, 2022, 4:16 p.m. UTC | #1
On Tue, Jun 07, 2022 at 03:50:57PM +0800, Zqiang wrote:
> Currently, If the 'rcu_nocb_poll' bootargs is enable, all rcuog kthreads
> enter polling mode. however, due to only insert CPU's rdp which belong to
> rcu_nocb_mask to 'nocb_head_rdp' list or all CPU's rdp served by rcuog
> kthread have been de-offloaded, these cause the 'nocb_head_rdp' list
> served by rcuog kthread is empty, when the 'nocb_head_rdp' is empty,
> the rcuog kthread in polling mode not actually do anything. fix it by
> exiting polling mode when the 'nocb_head_rdp'list is empty, otherwise
> entering polling mode.
> 
> Signed-off-by: Zqiang <qiang1.zhang@intel.com>

This looks a bit more plausible, but what have you done to test this?

							Thanx, Paul

> ---
>  v1->v2:
>  Move rcu_nocb_poll flags check from rdp_offload_toggle() to
>  rcu_nocb_rdp_offload/deoffload(), avoid unnecessary setting of
>  rdp_gp->nocb_gp_sleep flags, because when rcu_nocb_poll is set
>  the rdp_gp->nocb_gp_sleep is not used.
> 
>  kernel/rcu/tree_nocb.h | 16 ++++++++++------
>  1 file changed, 10 insertions(+), 6 deletions(-)
> 
> diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
> index fa8e4f82e60c..2a52c9abc681 100644
> --- a/kernel/rcu/tree_nocb.h
> +++ b/kernel/rcu/tree_nocb.h
> @@ -698,10 +698,14 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
>  				   TPS("WakeBypassIsDeferred"));
>  	}
>  	if (rcu_nocb_poll) {
> -		/* Polling, so trace if first poll in the series. */
> -		if (gotcbs)
> -			trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
> -		schedule_timeout_idle(1);
> +		if (list_empty(&my_rdp->nocb_head_rdp)) {
> +			rcu_wait(READ_ONCE(my_rdp->nocb_toggling_rdp));
> +		} else {
> +			/* Polling, so trace if first poll in the series. */
> +			if (gotcbs)
> +				trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
> +			schedule_timeout_idle(1);
> +		}
>  	} else if (!needwait_gp) {
>  		/* Wait for callbacks to appear. */
>  		trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Sleep"));
> @@ -1030,7 +1034,7 @@ static long rcu_nocb_rdp_deoffload(void *arg)
>  
>  	mutex_lock(&rdp_gp->nocb_gp_kthread_mutex);
>  	if (rdp_gp->nocb_gp_kthread) {
> -		if (wake_gp)
> +		if (wake_gp || rcu_nocb_poll)
>  			wake_up_process(rdp_gp->nocb_gp_kthread);
>  
>  		/*
> @@ -1152,7 +1156,7 @@ static long rcu_nocb_rdp_offload(void *arg)
>  	 *      rcu_nocb_unlock()         rcu_nocb_unlock()
>  	 */
>  	wake_gp = rdp_offload_toggle(rdp, true, flags);
> -	if (wake_gp)
> +	if (wake_gp || rcu_nocb_poll)
>  		wake_up_process(rdp_gp->nocb_gp_kthread);
>  	swait_event_exclusive(rdp->nocb_state_wq,
>  			      rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB) &&
> -- 
> 2.25.1
>
Zqiang June 8, 2022, 12:41 a.m. UTC | #2
On Tue, Jun 07, 2022 at 03:50:57PM +0800, Zqiang wrote:
> Currently, If the 'rcu_nocb_poll' bootargs is enable, all rcuog 
> kthreads enter polling mode. however, due to only insert CPU's rdp 
> which belong to rcu_nocb_mask to 'nocb_head_rdp' list or all CPU's rdp 
> served by rcuog kthread have been de-offloaded, these cause the 
> 'nocb_head_rdp' list served by rcuog kthread is empty, when the 
> 'nocb_head_rdp' is empty, the rcuog kthread in polling mode not 
> actually do anything. fix it by exiting polling mode when the 
> 'nocb_head_rdp'list is empty, otherwise entering polling mode.
> 
> Signed-off-by: Zqiang <qiang1.zhang@intel.com>
>
>This looks a bit more plausible, but what have you done to test this?

Yes , I have only tested as follows and I added two trace events.

+                      trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("EnterNoPoll"));
                        rcu_wait(READ_ONCE(my_rdp->nocb_toggling_rdp));
+                       trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("ExitNoPoll"));

runqemu kvm slirp nographic qemuparams="-m 2048 -smp 4" bootparams="rcu_nocbs=2,3 rcutree.dump_tree=1 rcu_nocb_poll rcutorture.nocbs_nthreads=4 rcutorture.test_boost=0" -d

Thanks
Zqiang

>
>							Thanx, Paul
>
> ---
>  v1->v2:
>  Move rcu_nocb_poll flags check from rdp_offload_toggle() to  
> rcu_nocb_rdp_offload/deoffload(), avoid unnecessary setting of  
> rdp_gp->nocb_gp_sleep flags, because when rcu_nocb_poll is set  the 
> rdp_gp->nocb_gp_sleep is not used.
> 
>  kernel/rcu/tree_nocb.h | 16 ++++++++++------
>  1 file changed, 10 insertions(+), 6 deletions(-)
> 
> diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index 
> fa8e4f82e60c..2a52c9abc681 100644
> --- a/kernel/rcu/tree_nocb.h
> +++ b/kernel/rcu/tree_nocb.h
> @@ -698,10 +698,14 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
>  				   TPS("WakeBypassIsDeferred"));
>  	}
>  	if (rcu_nocb_poll) {
> -		/* Polling, so trace if first poll in the series. */
> -		if (gotcbs)
> -			trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
> -		schedule_timeout_idle(1);
> +		if (list_empty(&my_rdp->nocb_head_rdp)) {
> +			rcu_wait(READ_ONCE(my_rdp->nocb_toggling_rdp));
> +		} else {
> +			/* Polling, so trace if first poll in the series. */
> +			if (gotcbs)
> +				trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
> +			schedule_timeout_idle(1);
> +		}
>  	} else if (!needwait_gp) {
>  		/* Wait for callbacks to appear. */
>  		trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Sleep")); @@ -1030,7 
> +1034,7 @@ static long rcu_nocb_rdp_deoffload(void *arg)
>  
>  	mutex_lock(&rdp_gp->nocb_gp_kthread_mutex);
>  	if (rdp_gp->nocb_gp_kthread) {
> -		if (wake_gp)
> +		if (wake_gp || rcu_nocb_poll)
>  			wake_up_process(rdp_gp->nocb_gp_kthread);
>  
>  		/*
> @@ -1152,7 +1156,7 @@ static long rcu_nocb_rdp_offload(void *arg)
>  	 *      rcu_nocb_unlock()         rcu_nocb_unlock()
>  	 */
>  	wake_gp = rdp_offload_toggle(rdp, true, flags);
> -	if (wake_gp)
> +	if (wake_gp || rcu_nocb_poll)
>  		wake_up_process(rdp_gp->nocb_gp_kthread);
>  	swait_event_exclusive(rdp->nocb_state_wq,
>  			      rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB) &&
> --
> 2.25.1
>
Paul E. McKenney June 8, 2022, 12:55 a.m. UTC | #3
On Wed, Jun 08, 2022 at 12:41:28AM +0000, Zhang, Qiang1 wrote:
> On Tue, Jun 07, 2022 at 03:50:57PM +0800, Zqiang wrote:
> > Currently, If the 'rcu_nocb_poll' bootargs is enable, all rcuog 
> > kthreads enter polling mode. however, due to only insert CPU's rdp 
> > which belong to rcu_nocb_mask to 'nocb_head_rdp' list or all CPU's rdp 
> > served by rcuog kthread have been de-offloaded, these cause the 
> > 'nocb_head_rdp' list served by rcuog kthread is empty, when the 
> > 'nocb_head_rdp' is empty, the rcuog kthread in polling mode not 
> > actually do anything. fix it by exiting polling mode when the 
> > 'nocb_head_rdp'list is empty, otherwise entering polling mode.
> > 
> > Signed-off-by: Zqiang <qiang1.zhang@intel.com>
> >
> >This looks a bit more plausible, but what have you done to test this?
> 
> Yes , I have only tested as follows and I added two trace events.
> 
> +                      trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("EnterNoPoll"));
>                         rcu_wait(READ_ONCE(my_rdp->nocb_toggling_rdp));
> +                       trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("ExitNoPoll"));
> 
> runqemu kvm slirp nographic qemuparams="-m 2048 -smp 4" bootparams="rcu_nocbs=2,3 rcutree.dump_tree=1 rcu_nocb_poll rcutorture.nocbs_nthreads=4 rcutorture.test_boost=0" -d

To exercise your change, could you please also add an appropriate value
for the rcutorture.nocbs_nthreads kernel boot parameter?  Without that
boot parameter, your kernel will not actually offload or deoffload
any CPUs.

An alternative approach is to run rcutorture scenario TREE01.

							Thanx, Paul

> Thanks
> Zqiang
> 
> >
> >							Thanx, Paul
> >
> > ---
> >  v1->v2:
> >  Move rcu_nocb_poll flags check from rdp_offload_toggle() to  
> > rcu_nocb_rdp_offload/deoffload(), avoid unnecessary setting of  
> > rdp_gp->nocb_gp_sleep flags, because when rcu_nocb_poll is set  the 
> > rdp_gp->nocb_gp_sleep is not used.
> > 
> >  kernel/rcu/tree_nocb.h | 16 ++++++++++------
> >  1 file changed, 10 insertions(+), 6 deletions(-)
> > 
> > diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index 
> > fa8e4f82e60c..2a52c9abc681 100644
> > --- a/kernel/rcu/tree_nocb.h
> > +++ b/kernel/rcu/tree_nocb.h
> > @@ -698,10 +698,14 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
> >  				   TPS("WakeBypassIsDeferred"));
> >  	}
> >  	if (rcu_nocb_poll) {
> > -		/* Polling, so trace if first poll in the series. */
> > -		if (gotcbs)
> > -			trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
> > -		schedule_timeout_idle(1);
> > +		if (list_empty(&my_rdp->nocb_head_rdp)) {
> > +			rcu_wait(READ_ONCE(my_rdp->nocb_toggling_rdp));
> > +		} else {
> > +			/* Polling, so trace if first poll in the series. */
> > +			if (gotcbs)
> > +				trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
> > +			schedule_timeout_idle(1);
> > +		}
> >  	} else if (!needwait_gp) {
> >  		/* Wait for callbacks to appear. */
> >  		trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Sleep")); @@ -1030,7 
> > +1034,7 @@ static long rcu_nocb_rdp_deoffload(void *arg)
> >  
> >  	mutex_lock(&rdp_gp->nocb_gp_kthread_mutex);
> >  	if (rdp_gp->nocb_gp_kthread) {
> > -		if (wake_gp)
> > +		if (wake_gp || rcu_nocb_poll)
> >  			wake_up_process(rdp_gp->nocb_gp_kthread);
> >  
> >  		/*
> > @@ -1152,7 +1156,7 @@ static long rcu_nocb_rdp_offload(void *arg)
> >  	 *      rcu_nocb_unlock()         rcu_nocb_unlock()
> >  	 */
> >  	wake_gp = rdp_offload_toggle(rdp, true, flags);
> > -	if (wake_gp)
> > +	if (wake_gp || rcu_nocb_poll)
> >  		wake_up_process(rdp_gp->nocb_gp_kthread);
> >  	swait_event_exclusive(rdp->nocb_state_wq,
> >  			      rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB) &&
> > --
> > 2.25.1
> >
Zqiang June 8, 2022, 1:01 a.m. UTC | #4
On Wed, Jun 08, 2022 at 12:41:28AM +0000, Zhang, Qiang1 wrote:
> On Tue, Jun 07, 2022 at 03:50:57PM +0800, Zqiang wrote:
> > Currently, If the 'rcu_nocb_poll' bootargs is enable, all rcuog 
> > kthreads enter polling mode. however, due to only insert CPU's rdp 
> > which belong to rcu_nocb_mask to 'nocb_head_rdp' list or all CPU's 
> > rdp served by rcuog kthread have been de-offloaded, these cause the 
> > 'nocb_head_rdp' list served by rcuog kthread is empty, when the 
> > 'nocb_head_rdp' is empty, the rcuog kthread in polling mode not 
> > actually do anything. fix it by exiting polling mode when the 
> > 'nocb_head_rdp'list is empty, otherwise entering polling mode.
> > 
> > Signed-off-by: Zqiang <qiang1.zhang@intel.com>
> >
> >This looks a bit more plausible, but what have you done to test this?
> 
> Yes , I have only tested as follows and I added two trace events.
> 
> +                      trace_rcu_nocb_wake(rcu_state.name, cpu, 
> + TPS("EnterNoPoll"));
>                         
> rcu_wait(READ_ONCE(my_rdp->nocb_toggling_rdp));
> +                       trace_rcu_nocb_wake(rcu_state.name, cpu, 
> + TPS("ExitNoPoll"));
> 
> runqemu kvm slirp nographic qemuparams="-m 2048 -smp 4" 
> bootparams="rcu_nocbs=2,3 rcutree.dump_tree=1 rcu_nocb_poll 
> rcutorture.nocbs_nthreads=4 rcutorture.test_boost=0" -d

>To exercise your change, could you please also add an appropriate value for the rcutorture.nocbs_nthreads kernel boot parameter?  Without that boot parameter, your kernel will not actually offload or deoffload any CPUs.

I have set ' rcutorture.nocbs_nthreads=4 ' in bootargs.

>
>An alternative approach is to run rcutorture scenario TREE01.

I will also use TREE01 for testing

Thanks
Zqiang

>
>							Thanx, Paul

> Thanks
> Zqiang
> 
> >
> >							Thanx, Paul
> >
> > ---
> >  v1->v2:
> >  Move rcu_nocb_poll flags check from rdp_offload_toggle() to 
> > rcu_nocb_rdp_offload/deoffload(), avoid unnecessary setting of 
> > rdp_gp->nocb_gp_sleep flags, because when rcu_nocb_poll is set  the 
> > rdp_gp->nocb_gp_sleep is not used.
> > 
> >  kernel/rcu/tree_nocb.h | 16 ++++++++++------
> >  1 file changed, 10 insertions(+), 6 deletions(-)
> > 
> > diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index
> > fa8e4f82e60c..2a52c9abc681 100644
> > --- a/kernel/rcu/tree_nocb.h
> > +++ b/kernel/rcu/tree_nocb.h
> > @@ -698,10 +698,14 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
> >  				   TPS("WakeBypassIsDeferred"));
> >  	}
> >  	if (rcu_nocb_poll) {
> > -		/* Polling, so trace if first poll in the series. */
> > -		if (gotcbs)
> > -			trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
> > -		schedule_timeout_idle(1);
> > +		if (list_empty(&my_rdp->nocb_head_rdp)) {
> > +			rcu_wait(READ_ONCE(my_rdp->nocb_toggling_rdp));
> > +		} else {
> > +			/* Polling, so trace if first poll in the series. */
> > +			if (gotcbs)
> > +				trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
> > +			schedule_timeout_idle(1);
> > +		}
> >  	} else if (!needwait_gp) {
> >  		/* Wait for callbacks to appear. */
> >  		trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Sleep")); @@ 
> > -1030,7
> > +1034,7 @@ static long rcu_nocb_rdp_deoffload(void *arg)
> >  
> >  	mutex_lock(&rdp_gp->nocb_gp_kthread_mutex);
> >  	if (rdp_gp->nocb_gp_kthread) {
> > -		if (wake_gp)
> > +		if (wake_gp || rcu_nocb_poll)
> >  			wake_up_process(rdp_gp->nocb_gp_kthread);
> >  
> >  		/*
> > @@ -1152,7 +1156,7 @@ static long rcu_nocb_rdp_offload(void *arg)
> >  	 *      rcu_nocb_unlock()         rcu_nocb_unlock()
> >  	 */
> >  	wake_gp = rdp_offload_toggle(rdp, true, flags);
> > -	if (wake_gp)
> > +	if (wake_gp || rcu_nocb_poll)
> >  		wake_up_process(rdp_gp->nocb_gp_kthread);
> >  	swait_event_exclusive(rdp->nocb_state_wq,
> >  			      rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB) &&
> > --
> > 2.25.1
> >
Paul E. McKenney June 8, 2022, 4:06 a.m. UTC | #5
On Wed, Jun 08, 2022 at 01:01:31AM +0000, Zhang, Qiang1 wrote:
> On Wed, Jun 08, 2022 at 12:41:28AM +0000, Zhang, Qiang1 wrote:
> > On Tue, Jun 07, 2022 at 03:50:57PM +0800, Zqiang wrote:
> > > Currently, If the 'rcu_nocb_poll' bootargs is enable, all rcuog 
> > > kthreads enter polling mode. however, due to only insert CPU's rdp 
> > > which belong to rcu_nocb_mask to 'nocb_head_rdp' list or all CPU's 
> > > rdp served by rcuog kthread have been de-offloaded, these cause the 
> > > 'nocb_head_rdp' list served by rcuog kthread is empty, when the 
> > > 'nocb_head_rdp' is empty, the rcuog kthread in polling mode not 
> > > actually do anything. fix it by exiting polling mode when the 
> > > 'nocb_head_rdp'list is empty, otherwise entering polling mode.
> > > 
> > > Signed-off-by: Zqiang <qiang1.zhang@intel.com>
> > >
> > >This looks a bit more plausible, but what have you done to test this?
> > 
> > Yes , I have only tested as follows and I added two trace events.
> > 
> > +                      trace_rcu_nocb_wake(rcu_state.name, cpu, 
> > + TPS("EnterNoPoll"));
> >                         
> > rcu_wait(READ_ONCE(my_rdp->nocb_toggling_rdp));
> > +                       trace_rcu_nocb_wake(rcu_state.name, cpu, 
> > + TPS("ExitNoPoll"));
> > 
> > runqemu kvm slirp nographic qemuparams="-m 2048 -smp 4" 
> > bootparams="rcu_nocbs=2,3 rcutree.dump_tree=1 rcu_nocb_poll 
> > rcutorture.nocbs_nthreads=4 rcutorture.test_boost=0" -d
> 
> >To exercise your change, could you please also add an appropriate value for the rcutorture.nocbs_nthreads kernel boot parameter?  Without that boot parameter, your kernel will not actually offload or deoffload any CPUs.
> 
> I have set ' rcutorture.nocbs_nthreads=4 ' in bootargs.

I see it now, apologies for my confusion!

							Thanx, Paul

> >An alternative approach is to run rcutorture scenario TREE01.
> 
> I will also use TREE01 for testing
> 
> Thanks
> Zqiang
> 
> >
> >							Thanx, Paul
> 
> > Thanks
> > Zqiang
> > 
> > >
> > >							Thanx, Paul
> > >
> > > ---
> > >  v1->v2:
> > >  Move rcu_nocb_poll flags check from rdp_offload_toggle() to 
> > > rcu_nocb_rdp_offload/deoffload(), avoid unnecessary setting of 
> > > rdp_gp->nocb_gp_sleep flags, because when rcu_nocb_poll is set  the 
> > > rdp_gp->nocb_gp_sleep is not used.
> > > 
> > >  kernel/rcu/tree_nocb.h | 16 ++++++++++------
> > >  1 file changed, 10 insertions(+), 6 deletions(-)
> > > 
> > > diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index
> > > fa8e4f82e60c..2a52c9abc681 100644
> > > --- a/kernel/rcu/tree_nocb.h
> > > +++ b/kernel/rcu/tree_nocb.h
> > > @@ -698,10 +698,14 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
> > >  				   TPS("WakeBypassIsDeferred"));
> > >  	}
> > >  	if (rcu_nocb_poll) {
> > > -		/* Polling, so trace if first poll in the series. */
> > > -		if (gotcbs)
> > > -			trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
> > > -		schedule_timeout_idle(1);
> > > +		if (list_empty(&my_rdp->nocb_head_rdp)) {
> > > +			rcu_wait(READ_ONCE(my_rdp->nocb_toggling_rdp));
> > > +		} else {
> > > +			/* Polling, so trace if first poll in the series. */
> > > +			if (gotcbs)
> > > +				trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
> > > +			schedule_timeout_idle(1);
> > > +		}
> > >  	} else if (!needwait_gp) {
> > >  		/* Wait for callbacks to appear. */
> > >  		trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Sleep")); @@ 
> > > -1030,7
> > > +1034,7 @@ static long rcu_nocb_rdp_deoffload(void *arg)
> > >  
> > >  	mutex_lock(&rdp_gp->nocb_gp_kthread_mutex);
> > >  	if (rdp_gp->nocb_gp_kthread) {
> > > -		if (wake_gp)
> > > +		if (wake_gp || rcu_nocb_poll)
> > >  			wake_up_process(rdp_gp->nocb_gp_kthread);
> > >  
> > >  		/*
> > > @@ -1152,7 +1156,7 @@ static long rcu_nocb_rdp_offload(void *arg)
> > >  	 *      rcu_nocb_unlock()         rcu_nocb_unlock()
> > >  	 */
> > >  	wake_gp = rdp_offload_toggle(rdp, true, flags);
> > > -	if (wake_gp)
> > > +	if (wake_gp || rcu_nocb_poll)
> > >  		wake_up_process(rdp_gp->nocb_gp_kthread);
> > >  	swait_event_exclusive(rdp->nocb_state_wq,
> > >  			      rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB) &&
> > > --
> > > 2.25.1
> > >
Zqiang June 8, 2022, 6:28 a.m. UTC | #6
On Wed, Jun 08, 2022 at 01:01:31AM +0000, Zhang, Qiang1 wrote:
> On Wed, Jun 08, 2022 at 12:41:28AM +0000, Zhang, Qiang1 wrote:
> > On Tue, Jun 07, 2022 at 03:50:57PM +0800, Zqiang wrote:
> > > Currently, If the 'rcu_nocb_poll' bootargs is enable, all rcuog 
> > > kthreads enter polling mode. however, due to only insert CPU's rdp 
> > > which belong to rcu_nocb_mask to 'nocb_head_rdp' list or all CPU's 
> > > rdp served by rcuog kthread have been de-offloaded, these cause 
> > > the 'nocb_head_rdp' list served by rcuog kthread is empty, when 
> > > the 'nocb_head_rdp' is empty, the rcuog kthread in polling mode 
> > > not actually do anything. fix it by exiting polling mode when the 
> > > 'nocb_head_rdp'list is empty, otherwise entering polling mode.
> > > 
> > > Signed-off-by: Zqiang <qiang1.zhang@intel.com>
> > >
> > >This looks a bit more plausible, but what have you done to test this?
> > 
> > Yes , I have only tested as follows and I added two trace events.
> > 
> > +                      trace_rcu_nocb_wake(rcu_state.name, cpu, 
> > + TPS("EnterNoPoll"));
> >                         
> > rcu_wait(READ_ONCE(my_rdp->nocb_toggling_rdp));
> > +                       trace_rcu_nocb_wake(rcu_state.name, cpu, 
> > + TPS("ExitNoPoll"));
> > 
> > runqemu kvm slirp nographic qemuparams="-m 2048 -smp 4" 
> > bootparams="rcu_nocbs=2,3 rcutree.dump_tree=1 rcu_nocb_poll
> > rcutorture.nocbs_nthreads=4 rcutorture.test_boost=0" -d

Hi Paul, The test trace log is as follows:

         rcuog/0-19      [003] .....   169.400494: rcu_nocb_wake: rcu_preempt 0 EnterNoPoll
         rcuog/0-19      [003] .....   172.592072: rcu_nocb_wake: rcu_preempt 0 ExitNoPoll
         rcuog/2-36      [002] .....   173.553717: rcu_nocb_wake: rcu_preempt 2 EnterNoPoll
         rcuog/2-36      [002] .....   173.750084: rcu_nocb_wake: rcu_preempt 2 ExitNoPoll

Thanks
Zqiang

> 
> >To exercise your change, could you please also add an appropriate value for the rcutorture.nocbs_nthreads kernel boot parameter?  Without that boot parameter, your kernel will not actually offload or deoffload any CPUs.
> 
> I have set ' rcutorture.nocbs_nthreads=4 ' in bootargs.
>
>I see it now, apologies for my confusion!
>
>							Thanx, Paul
>
> >An alternative approach is to run rcutorture scenario TREE01.
> 
> I will also use TREE01 for testing
> 
> Thanks
> Zqiang
> 
> >
> >							Thanx, Paul
> 
> > Thanks
> > Zqiang
> > 
> > >
> > >							Thanx, Paul
> > >
> > > ---
> > >  v1->v2:
> > >  Move rcu_nocb_poll flags check from rdp_offload_toggle() to 
> > > rcu_nocb_rdp_offload/deoffload(), avoid unnecessary setting of 
> > > rdp_gp->nocb_gp_sleep flags, because when rcu_nocb_poll is set  
> > > the rdp_gp->nocb_gp_sleep is not used.
> > > 
> > >  kernel/rcu/tree_nocb.h | 16 ++++++++++------
> > >  1 file changed, 10 insertions(+), 6 deletions(-)
> > > 
> > > diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index
> > > fa8e4f82e60c..2a52c9abc681 100644
> > > --- a/kernel/rcu/tree_nocb.h
> > > +++ b/kernel/rcu/tree_nocb.h
> > > @@ -698,10 +698,14 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
> > >  				   TPS("WakeBypassIsDeferred"));
> > >  	}
> > >  	if (rcu_nocb_poll) {
> > > -		/* Polling, so trace if first poll in the series. */
> > > -		if (gotcbs)
> > > -			trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
> > > -		schedule_timeout_idle(1);
> > > +		if (list_empty(&my_rdp->nocb_head_rdp)) {
> > > +			rcu_wait(READ_ONCE(my_rdp->nocb_toggling_rdp));
> > > +		} else {
> > > +			/* Polling, so trace if first poll in the series. */
> > > +			if (gotcbs)
> > > +				trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
> > > +			schedule_timeout_idle(1);
> > > +		}
> > >  	} else if (!needwait_gp) {
> > >  		/* Wait for callbacks to appear. */
> > >  		trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Sleep")); @@
> > > -1030,7
> > > +1034,7 @@ static long rcu_nocb_rdp_deoffload(void *arg)
> > >  
> > >  	mutex_lock(&rdp_gp->nocb_gp_kthread_mutex);
> > >  	if (rdp_gp->nocb_gp_kthread) {
> > > -		if (wake_gp)
> > > +		if (wake_gp || rcu_nocb_poll)
> > >  			wake_up_process(rdp_gp->nocb_gp_kthread);
> > >  
> > >  		/*
> > > @@ -1152,7 +1156,7 @@ static long rcu_nocb_rdp_offload(void *arg)
> > >  	 *      rcu_nocb_unlock()         rcu_nocb_unlock()
> > >  	 */
> > >  	wake_gp = rdp_offload_toggle(rdp, true, flags);
> > > -	if (wake_gp)
> > > +	if (wake_gp || rcu_nocb_poll)
> > >  		wake_up_process(rdp_gp->nocb_gp_kthread);
> > >  	swait_event_exclusive(rdp->nocb_state_wq,
> > >  			      rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB) 
> > > &&
> > > --
> > > 2.25.1
> > >
Frederic Weisbecker June 10, 2022, 12:05 p.m. UTC | #7
On Tue, Jun 07, 2022 at 03:50:57PM +0800, Zqiang wrote:
> Currently, If the 'rcu_nocb_poll' bootargs is enable, all rcuog kthreads
> enter polling mode. however, due to only insert CPU's rdp which belong to
> rcu_nocb_mask to 'nocb_head_rdp' list or all CPU's rdp served by rcuog
> kthread have been de-offloaded, these cause the 'nocb_head_rdp' list
> served by rcuog kthread is empty, when the 'nocb_head_rdp' is empty,
> the rcuog kthread in polling mode not actually do anything. fix it by
> exiting polling mode when the 'nocb_head_rdp'list is empty, otherwise
> entering polling mode.
> 
> Signed-off-by: Zqiang <qiang1.zhang@intel.com>
> ---
>  v1->v2:
>  Move rcu_nocb_poll flags check from rdp_offload_toggle() to
>  rcu_nocb_rdp_offload/deoffload(), avoid unnecessary setting of
>  rdp_gp->nocb_gp_sleep flags, because when rcu_nocb_poll is set
>  the rdp_gp->nocb_gp_sleep is not used.
> 
>  kernel/rcu/tree_nocb.h | 16 ++++++++++------
>  1 file changed, 10 insertions(+), 6 deletions(-)
> 
> diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
> index fa8e4f82e60c..2a52c9abc681 100644
> --- a/kernel/rcu/tree_nocb.h
> +++ b/kernel/rcu/tree_nocb.h
> @@ -698,10 +698,14 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
>  				   TPS("WakeBypassIsDeferred"));
>  	}
>  	if (rcu_nocb_poll) {
> -		/* Polling, so trace if first poll in the series. */
> -		if (gotcbs)
> -			trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
> -		schedule_timeout_idle(1);
> +		if (list_empty(&my_rdp->nocb_head_rdp)) {
> +			rcu_wait(READ_ONCE(my_rdp->nocb_toggling_rdp));

I suspect you have based your patch on upstream tree which doesn't seem
to have this one yet:

   "rcu/nocb: Add/del rdp to iterate from rcuog itself"

With this patch you can't wait on changes to my_rdp->nocb_toggling_rdp because
nocb_gp_wait() now performs the list_add/list_del itself.

Please rebase your patch on top of latest rcu:dev from Paul's tree. Then
all you need to do is to change the wait side, something like this (untested):

diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index fa8e4f82e60c..f36d6be4f372 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -584,6 +584,15 @@ static int nocb_gp_toggle_rdp(struct rcu_data *rdp,
 	return ret;
 }
 
+static void nocb_gp_sleep(struct rdp *my_rdp, int cpu)
+{
+	trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Sleep"));
+	swait_event_interruptible_exclusive(my_rdp->nocb_gp_wq,
+					    !READ_ONCE(my_rdp->nocb_gp_sleep));
+	trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("EndSleep"));
+}
+
+
 /*
  * No-CBs GP kthreads come here to wait for additional callbacks to show up
  * or for grace periods to end.
@@ -701,13 +710,19 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
 		/* Polling, so trace if first poll in the series. */
 		if (gotcbs)
 			trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
-		schedule_timeout_idle(1);
+		if (list_empty(&my_rdp->nocb_head_rdp)) {
+			raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags);
+			if (!my_rdp->nocb_toggling_rdp)
+				WRITE_ONCE(my_rdp->nocb_gp_sleep, true);
+			raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);
+			/* Wait for any offloading rdp */
+			rdp_gp_sleep(my_rdp, cpu);
+		} else {
+			schedule_timeout_idle(1);
+		}
 	} else if (!needwait_gp) {
 		/* Wait for callbacks to appear. */
-		trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Sleep"));
-		swait_event_interruptible_exclusive(my_rdp->nocb_gp_wq,
-				!READ_ONCE(my_rdp->nocb_gp_sleep));
-		trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("EndSleep"));
+		rdp_gp_sleep(my_rdp, cpu);
 	} else {
 		rnp = my_rdp->mynode;
 		trace_rcu_this_gp(rnp, my_rdp, wait_gp_seq, TPS("StartWait"));
Zqiang June 11, 2022, 5:29 a.m. UTC | #8
On Tue, Jun 07, 2022 at 03:50:57PM +0800, Zqiang wrote:
> Currently, If the 'rcu_nocb_poll' bootargs is enable, all rcuog 
> kthreads enter polling mode. however, due to only insert CPU's rdp 
> which belong to rcu_nocb_mask to 'nocb_head_rdp' list or all CPU's rdp 
> served by rcuog kthread have been de-offloaded, these cause the 
> 'nocb_head_rdp' list served by rcuog kthread is empty, when the 
> 'nocb_head_rdp' is empty, the rcuog kthread in polling mode not 
> actually do anything. fix it by exiting polling mode when the 
> 'nocb_head_rdp'list is empty, otherwise entering polling mode.
> 
> Signed-off-by: Zqiang <qiang1.zhang@intel.com>
> ---
>  v1->v2:
>  Move rcu_nocb_poll flags check from rdp_offload_toggle() to  
> rcu_nocb_rdp_offload/deoffload(), avoid unnecessary setting of  
> rdp_gp->nocb_gp_sleep flags, because when rcu_nocb_poll is set  the 
> rdp_gp->nocb_gp_sleep is not used.
> 
>  kernel/rcu/tree_nocb.h | 16 ++++++++++------
>  1 file changed, 10 insertions(+), 6 deletions(-)
> 
> diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index 
> fa8e4f82e60c..2a52c9abc681 100644
> --- a/kernel/rcu/tree_nocb.h
> +++ b/kernel/rcu/tree_nocb.h
> @@ -698,10 +698,14 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
>  				   TPS("WakeBypassIsDeferred"));
>  	}
>  	if (rcu_nocb_poll) {
> -		/* Polling, so trace if first poll in the series. */
> -		if (gotcbs)
> -			trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
> -		schedule_timeout_idle(1);
> +		if (list_empty(&my_rdp->nocb_head_rdp)) {
> +			rcu_wait(READ_ONCE(my_rdp->nocb_toggling_rdp));

>I suspect you have based your patch on upstream tree which doesn't seem to have this one yet:
>
>   "rcu/nocb: Add/del rdp to iterate from rcuog itself"
>
>With this patch you can't wait on changes to my_rdp->nocb_toggling_rdp because
>nocb_gp_wait() now performs the list_add/list_del itself.
>
>Please rebase your patch on top of latest rcu:dev from Paul's tree. Then all you need to do is to change the wait side, something like this (untested):

Hi Frederic  thanks for your advice.
This is similar to what I said in my previous email, this change is better, I will test it.

Thanks
Zqiang


>
>diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index fa8e4f82e60c..f36d6be4f372 100644
>--- a/kernel/rcu/tree_nocb.h
>+++ b/kernel/rcu/tree_nocb.h
>@@ -584,6 +584,15 @@ static int nocb_gp_toggle_rdp(struct rcu_data *rdp,
> 	return ret;
> }
> 
>+static void nocb_gp_sleep(struct rdp *my_rdp, int cpu) {
>+	trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Sleep"));
>+	swait_event_interruptible_exclusive(my_rdp->nocb_gp_wq,
>+					    !READ_ONCE(my_rdp->nocb_gp_sleep));
>+	trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("EndSleep")); }
>+
>+
> /*
>  * No-CBs GP kthreads come here to wait for additional callbacks to show up
>  * or for grace periods to end.
>@@ -701,13 +710,19 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
> 		/* Polling, so trace if first poll in the series. */
> 		if (gotcbs)
> 			trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
>-		schedule_timeout_idle(1);
>+		if (list_empty(&my_rdp->nocb_head_rdp)) {
>+			raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags);
>+			if (!my_rdp->nocb_toggling_rdp)
>+				WRITE_ONCE(my_rdp->nocb_gp_sleep, true);
>+			raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags);
>+			/* Wait for any offloading rdp */
>+			rdp_gp_sleep(my_rdp, cpu);
>+		} else {
>+			schedule_timeout_idle(1);
>+		}
> 	} else if (!needwait_gp) {
> 		/* Wait for callbacks to appear. */
>-		trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Sleep"));
>-		swait_event_interruptible_exclusive(my_rdp->nocb_gp_wq,
>-				!READ_ONCE(my_rdp->nocb_gp_sleep));
>-		trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("EndSleep"));
>+		rdp_gp_sleep(my_rdp, cpu);
> 	} else {
> 		rnp = my_rdp->mynode;
>
> 		trace_rcu_this_gp(rnp, my_rdp, wait_gp_seq, TPS("StartWait"));
diff mbox series

Patch

diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index fa8e4f82e60c..2a52c9abc681 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -698,10 +698,14 @@  static void nocb_gp_wait(struct rcu_data *my_rdp)
 				   TPS("WakeBypassIsDeferred"));
 	}
 	if (rcu_nocb_poll) {
-		/* Polling, so trace if first poll in the series. */
-		if (gotcbs)
-			trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
-		schedule_timeout_idle(1);
+		if (list_empty(&my_rdp->nocb_head_rdp)) {
+			rcu_wait(READ_ONCE(my_rdp->nocb_toggling_rdp));
+		} else {
+			/* Polling, so trace if first poll in the series. */
+			if (gotcbs)
+				trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
+			schedule_timeout_idle(1);
+		}
 	} else if (!needwait_gp) {
 		/* Wait for callbacks to appear. */
 		trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Sleep"));
@@ -1030,7 +1034,7 @@  static long rcu_nocb_rdp_deoffload(void *arg)
 
 	mutex_lock(&rdp_gp->nocb_gp_kthread_mutex);
 	if (rdp_gp->nocb_gp_kthread) {
-		if (wake_gp)
+		if (wake_gp || rcu_nocb_poll)
 			wake_up_process(rdp_gp->nocb_gp_kthread);
 
 		/*
@@ -1152,7 +1156,7 @@  static long rcu_nocb_rdp_offload(void *arg)
 	 *      rcu_nocb_unlock()         rcu_nocb_unlock()
 	 */
 	wake_gp = rdp_offload_toggle(rdp, true, flags);
-	if (wake_gp)
+	if (wake_gp || rcu_nocb_poll)
 		wake_up_process(rdp_gp->nocb_gp_kthread);
 	swait_event_exclusive(rdp->nocb_state_wq,
 			      rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB) &&