diff mbox series

srcu: switch work func to allow concurrent gp

Message ID 20221130083902.31577-1-kernelfans@gmail.com (mailing list archive)
State New, archived
Headers show
Series srcu: switch work func to allow concurrent gp | expand

Commit Message

Pingfan Liu Nov. 30, 2022, 8:39 a.m. UTC
ssp->srcu_cb_mutex is introduced to allow the other srcu state machine
to advance as soon as possible. But according to the implement of
workqueue, the same work_struct is serialized and can not run
concurrently in fact.

Quoting from Documentation/core-api/workqueue.rst
"
Non-reentrance Conditions
=========================

Workqueue guarantees that a work item cannot be re-entrant if the following
conditions hold after a work item gets queued:

        1. The work function hasn't been changed.
        2. No one queues the work item to another workqueue.
        3. The work item hasn't been reinitiated.
"

To allow the concurrence to some extent, it can be achieved by changing
the work function to break the conditions. As a result, when
srcu_gp_end() releases srcu_gp_mutex, a new state machine can begin.

Signed-off-by: Pingfan Liu <kernelfans@gmail.com>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: "Zhang, Qiang1" <qiang1.zhang@intel.com>
To: rcu@vger.kernel.org
---
 kernel/rcu/srcutree.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

Comments

Boqun Feng Nov. 30, 2022, 4:53 p.m. UTC | #1
On Wed, Nov 30, 2022 at 04:39:02PM +0800, Pingfan Liu wrote:
> ssp->srcu_cb_mutex is introduced to allow the other srcu state machine
> to advance as soon as possible. But according to the implement of
> workqueue, the same work_struct is serialized and can not run
> concurrently in fact.
> 
> Quoting from Documentation/core-api/workqueue.rst
> "
> Non-reentrance Conditions
> =========================
> 
> Workqueue guarantees that a work item cannot be re-entrant if the following
> conditions hold after a work item gets queued:
> 
>         1. The work function hasn't been changed.
>         2. No one queues the work item to another workqueue.
>         3. The work item hasn't been reinitiated.
> "
> 
> To allow the concurrence to some extent, it can be achieved by changing
> the work function to break the conditions. As a result, when
> srcu_gp_end() releases srcu_gp_mutex, a new state machine can begin.
> 
> Signed-off-by: Pingfan Liu <kernelfans@gmail.com>
> Cc: Lai Jiangshan <jiangshanlai@gmail.com>
> Cc: "Paul E. McKenney" <paulmck@kernel.org>
> Cc: Frederic Weisbecker <frederic@kernel.org>
> Cc: Josh Triplett <josh@joshtriplett.org>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> Cc: "Zhang, Qiang1" <qiang1.zhang@intel.com>
> To: rcu@vger.kernel.org
> ---
>  kernel/rcu/srcutree.c | 19 +++++++++++++++++++
>  1 file changed, 19 insertions(+)
> 
> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> index 1c304fec89c0..56dd9bb2c8b8 100644
> --- a/kernel/rcu/srcutree.c
> +++ b/kernel/rcu/srcutree.c
> @@ -75,6 +75,7 @@ static bool __read_mostly srcu_init_done;
>  static void srcu_invoke_callbacks(struct work_struct *work);
>  static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay);
>  static void process_srcu(struct work_struct *work);
> +static void process_srcu_wrap(struct work_struct *work);
>  static void srcu_delay_timer(struct timer_list *t);
>  
>  /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */
> @@ -763,6 +764,11 @@ static void srcu_gp_end(struct srcu_struct *ssp)
>  		cbdelay = 0;
>  
>  	WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns());
> +	/* Change work func so work can be concurrent */
> +	if (ssp->work.work.func == process_srcu_wrap)
> +		ssp->work.work.func = process_srcu;
> +	else
> +		ssp->work.work.func = process_srcu_wrap;

This looks really hacky ;-) It would be good that workqueue has an API
to allow "resetting" a work.

Do you have any number of the potential performance improvement?

Regards,
Boqun

>  	rcu_seq_end(&ssp->srcu_gp_seq);
>  	gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
>  	if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, gpseq))
> @@ -1637,6 +1643,19 @@ static void process_srcu(struct work_struct *work)
>  	srcu_reschedule(ssp, curdelay);
>  }
>  
> +/*
> + * The ssp->work is expected to be concurrent to some extent, but the current
> + * workqueue does not support the concurrence on the same work. (Refer to the
> + * section "Non-reentrance Conditions" in the file workqueue.rst)
> + * Resolving it by changing the work func.
> + *
> + * Prevent compilering from optimizing out it.
> + */
> +static __used void process_srcu_wrap(struct work_struct *work)
> +{
> +	process_srcu(work);
> +}
> +
>  void srcutorture_get_gp_data(enum rcutorture_type test_type,
>  			     struct srcu_struct *ssp, int *flags,
>  			     unsigned long *gp_seq)
> -- 
> 2.31.1
>
Joel Fernandes Nov. 30, 2022, 5:39 p.m. UTC | #2
> On Nov 30, 2022, at 11:55 AM, Boqun Feng <boqun.feng@gmail.com> wrote:
> 
> On Wed, Nov 30, 2022 at 04:39:02PM +0800, Pingfan Liu wrote:
>> ssp->srcu_cb_mutex is introduced to allow the other srcu state machine
>> to advance as soon as possible. But according to the implement of
>> workqueue, the same work_struct is serialized and can not run
>> concurrently in fact.
>> 
>> Quoting from Documentation/core-api/workqueue.rst
>> "
>> Non-reentrance Conditions
>> =========================
>> 
>> Workqueue guarantees that a work item cannot be re-entrant if the following
>> conditions hold after a work item gets queued:
>> 
>>        1. The work function hasn't been changed.
>>        2. No one queues the work item to another workqueue.
>>        3. The work item hasn't been reinitiated.
>> "
>> 
>> To allow the concurrence to some extent, it can be achieved by changing
>> the work function to break the conditions. As a result, when
>> srcu_gp_end() releases srcu_gp_mutex, a new state machine can begin.
>> 
>> Signed-off-by: Pingfan Liu <kernelfans@gmail.com>
>> Cc: Lai Jiangshan <jiangshanlai@gmail.com>
>> Cc: "Paul E. McKenney" <paulmck@kernel.org>
>> Cc: Frederic Weisbecker <frederic@kernel.org>
>> Cc: Josh Triplett <josh@joshtriplett.org>
>> Cc: Steven Rostedt <rostedt@goodmis.org>
>> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
>> Cc: "Zhang, Qiang1" <qiang1.zhang@intel.com>
>> To: rcu@vger.kernel.org
>> ---
>> kernel/rcu/srcutree.c | 19 +++++++++++++++++++
>> 1 file changed, 19 insertions(+)
>> 
>> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
>> index 1c304fec89c0..56dd9bb2c8b8 100644
>> --- a/kernel/rcu/srcutree.c
>> +++ b/kernel/rcu/srcutree.c
>> @@ -75,6 +75,7 @@ static bool __read_mostly srcu_init_done;
>> static void srcu_invoke_callbacks(struct work_struct *work);
>> static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay);
>> static void process_srcu(struct work_struct *work);
>> +static void process_srcu_wrap(struct work_struct *work);
>> static void srcu_delay_timer(struct timer_list *t);
>> 
>> /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */
>> @@ -763,6 +764,11 @@ static void srcu_gp_end(struct srcu_struct *ssp)
>>        cbdelay = 0;
>> 
>>    WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns());
>> +    /* Change work func so work can be concurrent */
>> +    if (ssp->work.work.func == process_srcu_wrap)
>> +        ssp->work.work.func = process_srcu;
>> +    else
>> +        ssp->work.work.func = process_srcu_wrap;
> 
> This looks really hacky ;-) It would be good that workqueue has an API
> to allow "resetting" a work.
> 
> Do you have any number of the potential performance improvement?


Agreed. Another question - have you verified the workqueue behavior after this change (say using tracing), regardless of what the workqueue documentation says?

Thanks,

- Joel 



> 
> Regards,
> Boqun
> 
>>    rcu_seq_end(&ssp->srcu_gp_seq);
>>    gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
>>    if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, gpseq))
>> @@ -1637,6 +1643,19 @@ static void process_srcu(struct work_struct *work)
>>    srcu_reschedule(ssp, curdelay);
>> }
>> 
>> +/*
>> + * The ssp->work is expected to be concurrent to some extent, but the current
>> + * workqueue does not support the concurrence on the same work. (Refer to the
>> + * section "Non-reentrance Conditions" in the file workqueue.rst)
>> + * Resolving it by changing the work func.
>> + *
>> + * Prevent compilering from optimizing out it.
>> + */
>> +static __used void process_srcu_wrap(struct work_struct *work)
>> +{
>> +    process_srcu(work);
>> +}
>> +
>> void srcutorture_get_gp_data(enum rcutorture_type test_type,
>>                 struct srcu_struct *ssp, int *flags,
>>                 unsigned long *gp_seq)
>> -- 
>> 2.31.1
>>
Pingfan Liu Dec. 1, 2022, 1:53 p.m. UTC | #3
On Wed, Nov 30, 2022 at 08:53:43AM -0800, Boqun Feng wrote:
> On Wed, Nov 30, 2022 at 04:39:02PM +0800, Pingfan Liu wrote:
> > ssp->srcu_cb_mutex is introduced to allow the other srcu state machine
> > to advance as soon as possible. But according to the implement of
> > workqueue, the same work_struct is serialized and can not run
> > concurrently in fact.
> > 
> > Quoting from Documentation/core-api/workqueue.rst
> > "
> > Non-reentrance Conditions
> > =========================
> > 
> > Workqueue guarantees that a work item cannot be re-entrant if the following
> > conditions hold after a work item gets queued:
> > 
> >         1. The work function hasn't been changed.
> >         2. No one queues the work item to another workqueue.
> >         3. The work item hasn't been reinitiated.
> > "
> > 
> > To allow the concurrence to some extent, it can be achieved by changing
> > the work function to break the conditions. As a result, when
> > srcu_gp_end() releases srcu_gp_mutex, a new state machine can begin.
> > 
> > Signed-off-by: Pingfan Liu <kernelfans@gmail.com>
> > Cc: Lai Jiangshan <jiangshanlai@gmail.com>
> > Cc: "Paul E. McKenney" <paulmck@kernel.org>
> > Cc: Frederic Weisbecker <frederic@kernel.org>
> > Cc: Josh Triplett <josh@joshtriplett.org>
> > Cc: Steven Rostedt <rostedt@goodmis.org>
> > Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> > Cc: "Zhang, Qiang1" <qiang1.zhang@intel.com>
> > To: rcu@vger.kernel.org
> > ---
> >  kernel/rcu/srcutree.c | 19 +++++++++++++++++++
> >  1 file changed, 19 insertions(+)
> > 
> > diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> > index 1c304fec89c0..56dd9bb2c8b8 100644
> > --- a/kernel/rcu/srcutree.c
> > +++ b/kernel/rcu/srcutree.c
> > @@ -75,6 +75,7 @@ static bool __read_mostly srcu_init_done;
> >  static void srcu_invoke_callbacks(struct work_struct *work);
> >  static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay);
> >  static void process_srcu(struct work_struct *work);
> > +static void process_srcu_wrap(struct work_struct *work);
> >  static void srcu_delay_timer(struct timer_list *t);
> >  
> >  /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */
> > @@ -763,6 +764,11 @@ static void srcu_gp_end(struct srcu_struct *ssp)
> >  		cbdelay = 0;
> >  
> >  	WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns());
> > +	/* Change work func so work can be concurrent */
> > +	if (ssp->work.work.func == process_srcu_wrap)
> > +		ssp->work.work.func = process_srcu;
> > +	else
> > +		ssp->work.work.func = process_srcu_wrap;
> 
> This looks really hacky ;-) It would be good that workqueue has an API
> to allow "resetting" a work.
> 

Indeed, it is hacky and it can be done by using alternative work_struct
so that from the workqueue API, it is intact.

> Do you have any number of the potential performance improvement?
> 

I will try to bring out one. But the result may heavily depend on the
test case.


Thanks,

	Pingfan

> Regards,
> Boqun
> 
> >  	rcu_seq_end(&ssp->srcu_gp_seq);
> >  	gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
> >  	if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, gpseq))
> > @@ -1637,6 +1643,19 @@ static void process_srcu(struct work_struct *work)
> >  	srcu_reschedule(ssp, curdelay);
> >  }
> >  
> > +/*
> > + * The ssp->work is expected to be concurrent to some extent, but the current
> > + * workqueue does not support the concurrence on the same work. (Refer to the
> > + * section "Non-reentrance Conditions" in the file workqueue.rst)
> > + * Resolving it by changing the work func.
> > + *
> > + * Prevent compilering from optimizing out it.
> > + */
> > +static __used void process_srcu_wrap(struct work_struct *work)
> > +{
> > +	process_srcu(work);
> > +}
> > +
> >  void srcutorture_get_gp_data(enum rcutorture_type test_type,
> >  			     struct srcu_struct *ssp, int *flags,
> >  			     unsigned long *gp_seq)
> > -- 
> > 2.31.1
> >
Pingfan Liu Dec. 1, 2022, 1:54 p.m. UTC | #4
On Wed, Nov 30, 2022 at 12:39:53PM -0500, Joel Fernandes wrote:
> 
> 
> > On Nov 30, 2022, at 11:55 AM, Boqun Feng <boqun.feng@gmail.com> wrote:
> > 
> > On Wed, Nov 30, 2022 at 04:39:02PM +0800, Pingfan Liu wrote:
> >> ssp->srcu_cb_mutex is introduced to allow the other srcu state machine
> >> to advance as soon as possible. But according to the implement of
> >> workqueue, the same work_struct is serialized and can not run
> >> concurrently in fact.
> >> 
> >> Quoting from Documentation/core-api/workqueue.rst
> >> "
> >> Non-reentrance Conditions
> >> =========================
> >> 
> >> Workqueue guarantees that a work item cannot be re-entrant if the following
> >> conditions hold after a work item gets queued:
> >> 
> >>        1. The work function hasn't been changed.
> >>        2. No one queues the work item to another workqueue.
> >>        3. The work item hasn't been reinitiated.
> >> "
> >> 
> >> To allow the concurrence to some extent, it can be achieved by changing
> >> the work function to break the conditions. As a result, when
> >> srcu_gp_end() releases srcu_gp_mutex, a new state machine can begin.
> >> 
> >> Signed-off-by: Pingfan Liu <kernelfans@gmail.com>
> >> Cc: Lai Jiangshan <jiangshanlai@gmail.com>
> >> Cc: "Paul E. McKenney" <paulmck@kernel.org>
> >> Cc: Frederic Weisbecker <frederic@kernel.org>
> >> Cc: Josh Triplett <josh@joshtriplett.org>
> >> Cc: Steven Rostedt <rostedt@goodmis.org>
> >> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> >> Cc: "Zhang, Qiang1" <qiang1.zhang@intel.com>
> >> To: rcu@vger.kernel.org
> >> ---
> >> kernel/rcu/srcutree.c | 19 +++++++++++++++++++
> >> 1 file changed, 19 insertions(+)
> >> 
> >> diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> >> index 1c304fec89c0..56dd9bb2c8b8 100644
> >> --- a/kernel/rcu/srcutree.c
> >> +++ b/kernel/rcu/srcutree.c
> >> @@ -75,6 +75,7 @@ static bool __read_mostly srcu_init_done;
> >> static void srcu_invoke_callbacks(struct work_struct *work);
> >> static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay);
> >> static void process_srcu(struct work_struct *work);
> >> +static void process_srcu_wrap(struct work_struct *work);
> >> static void srcu_delay_timer(struct timer_list *t);
> >> 
> >> /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */
> >> @@ -763,6 +764,11 @@ static void srcu_gp_end(struct srcu_struct *ssp)
> >>        cbdelay = 0;
> >> 
> >>    WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns());
> >> +    /* Change work func so work can be concurrent */
> >> +    if (ssp->work.work.func == process_srcu_wrap)
> >> +        ssp->work.work.func = process_srcu;
> >> +    else
> >> +        ssp->work.work.func = process_srcu_wrap;
> > 
> > This looks really hacky ;-) It would be good that workqueue has an API
> > to allow "resetting" a work.
> > 
> > Do you have any number of the potential performance improvement?
> 
> 
> Agreed. Another question - have you verified the workqueue behavior after this change (say using tracing), regardless of what the workqueue documentation says?
> 

I will try to answer this question after figuring out a test case.

Thanks,

	Pingfan

> Thanks,
> 
> - Joel 
> 
> 
> 
> > 
> > Regards,
> > Boqun
> > 
> >>    rcu_seq_end(&ssp->srcu_gp_seq);
> >>    gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
> >>    if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, gpseq))
> >> @@ -1637,6 +1643,19 @@ static void process_srcu(struct work_struct *work)
> >>    srcu_reschedule(ssp, curdelay);
> >> }
> >> 
> >> +/*
> >> + * The ssp->work is expected to be concurrent to some extent, but the current
> >> + * workqueue does not support the concurrence on the same work. (Refer to the
> >> + * section "Non-reentrance Conditions" in the file workqueue.rst)
> >> + * Resolving it by changing the work func.
> >> + *
> >> + * Prevent compilering from optimizing out it.
> >> + */
> >> +static __used void process_srcu_wrap(struct work_struct *work)
> >> +{
> >> +    process_srcu(work);
> >> +}
> >> +
> >> void srcutorture_get_gp_data(enum rcutorture_type test_type,
> >>                 struct srcu_struct *ssp, int *flags,
> >>                 unsigned long *gp_seq)
> >> -- 
> >> 2.31.1
> >>
Pingfan Liu Dec. 13, 2022, 1 p.m. UTC | #5
On Thu, Dec 01, 2022 at 09:53:36PM +0800, Pingfan Liu wrote:
> On Wed, Nov 30, 2022 at 08:53:43AM -0800, Boqun Feng wrote:
> > On Wed, Nov 30, 2022 at 04:39:02PM +0800, Pingfan Liu wrote:
> > > ssp->srcu_cb_mutex is introduced to allow the other srcu state machine
> > > to advance as soon as possible. But according to the implement of
> > > workqueue, the same work_struct is serialized and can not run
> > > concurrently in fact.
> > > 
> > > Quoting from Documentation/core-api/workqueue.rst
> > > "
> > > Non-reentrance Conditions
> > > =========================
> > > 
> > > Workqueue guarantees that a work item cannot be re-entrant if the following
> > > conditions hold after a work item gets queued:
> > > 
> > >         1. The work function hasn't been changed.
> > >         2. No one queues the work item to another workqueue.
> > >         3. The work item hasn't been reinitiated.
> > > "
> > > 
> > > To allow the concurrence to some extent, it can be achieved by changing
> > > the work function to break the conditions. As a result, when
> > > srcu_gp_end() releases srcu_gp_mutex, a new state machine can begin.
> > > 
> > > Signed-off-by: Pingfan Liu <kernelfans@gmail.com>
> > > Cc: Lai Jiangshan <jiangshanlai@gmail.com>
> > > Cc: "Paul E. McKenney" <paulmck@kernel.org>
> > > Cc: Frederic Weisbecker <frederic@kernel.org>
> > > Cc: Josh Triplett <josh@joshtriplett.org>
> > > Cc: Steven Rostedt <rostedt@goodmis.org>
> > > Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> > > Cc: "Zhang, Qiang1" <qiang1.zhang@intel.com>
> > > To: rcu@vger.kernel.org
> > > ---
> > >  kernel/rcu/srcutree.c | 19 +++++++++++++++++++
> > >  1 file changed, 19 insertions(+)
> > > 
> > > diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> > > index 1c304fec89c0..56dd9bb2c8b8 100644
> > > --- a/kernel/rcu/srcutree.c
> > > +++ b/kernel/rcu/srcutree.c
> > > @@ -75,6 +75,7 @@ static bool __read_mostly srcu_init_done;
> > >  static void srcu_invoke_callbacks(struct work_struct *work);
> > >  static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay);
> > >  static void process_srcu(struct work_struct *work);
> > > +static void process_srcu_wrap(struct work_struct *work);
> > >  static void srcu_delay_timer(struct timer_list *t);
> > >  
> > >  /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */
> > > @@ -763,6 +764,11 @@ static void srcu_gp_end(struct srcu_struct *ssp)
> > >  		cbdelay = 0;
> > >  
> > >  	WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns());
> > > +	/* Change work func so work can be concurrent */
> > > +	if (ssp->work.work.func == process_srcu_wrap)
> > > +		ssp->work.work.func = process_srcu;
> > > +	else
> > > +		ssp->work.work.func = process_srcu_wrap;
> > 
> > This looks really hacky ;-) It would be good that workqueue has an API
> > to allow "resetting" a work.
> > 
> 
> Indeed, it is hacky and it can be done by using alternative work_struct
> so that from the workqueue API, it is intact.
> 
> > Do you have any number of the potential performance improvement?
> > 
> 
> I will try to bring out one. But the result may heavily depend on the
> test case.
> 

Sorry to update late, I was interrupted by something else.

I used two machine to test

-1. on 144 cpus hpe-dl560gen10

modprobe rcutorture   torture_type=srcud  fwd_progress=144 fwd_progress_holdoff=1 shutdown_secs=36000 stat_interval=60 verbose=1

# no patch
[37587.632155] srcud:  End-test grace-period state: g28287244 f0x0 total-gps=28287244
#my patch
[36443.468017] srcud:  End-test grace-period state: g29026056 f0x0 total-gps=29026056


-2. on 256 cpus amd-milan
modprobe rcutorture   torture_type=srcud  fwd_progress=256 fwd_progress_holdoff=1 shutdown_secs=36000 stat_interval=60 verbose=1

# no patch
[36093.605732] srcud:  End-test grace-period state: g10850284 f0x0 total-gps=10850284
#my patch
[36093.856713] srcud:  End-test grace-period state: g10672632 f0x0 total-gps=10672632


The first test shows that it has about 2.6% improvement, while the
second test shows it has no significant effect.


I wonder if any test case can be in flavour of my patch. Any suggestion?


Thanks,

	Pingfan

> Thanks,
> 
> 	Pingfan
> 
> > Regards,
> > Boqun
> > 
> > >  	rcu_seq_end(&ssp->srcu_gp_seq);
> > >  	gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
> > >  	if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, gpseq))
> > > @@ -1637,6 +1643,19 @@ static void process_srcu(struct work_struct *work)
> > >  	srcu_reschedule(ssp, curdelay);
> > >  }
> > >  
> > > +/*
> > > + * The ssp->work is expected to be concurrent to some extent, but the current
> > > + * workqueue does not support the concurrence on the same work. (Refer to the
> > > + * section "Non-reentrance Conditions" in the file workqueue.rst)
> > > + * Resolving it by changing the work func.
> > > + *
> > > + * Prevent compilering from optimizing out it.
> > > + */
> > > +static __used void process_srcu_wrap(struct work_struct *work)
> > > +{
> > > +	process_srcu(work);
> > > +}
> > > +
> > >  void srcutorture_get_gp_data(enum rcutorture_type test_type,
> > >  			     struct srcu_struct *ssp, int *flags,
> > >  			     unsigned long *gp_seq)
> > > -- 
> > > 2.31.1
> > >
Boqun Feng Dec. 13, 2022, 7:51 p.m. UTC | #6
On Tue, Dec 13, 2022 at 09:00:30PM +0800, Pingfan Liu wrote:
> On Thu, Dec 01, 2022 at 09:53:36PM +0800, Pingfan Liu wrote:
> > On Wed, Nov 30, 2022 at 08:53:43AM -0800, Boqun Feng wrote:
> > > On Wed, Nov 30, 2022 at 04:39:02PM +0800, Pingfan Liu wrote:
> > > > ssp->srcu_cb_mutex is introduced to allow the other srcu state machine
> > > > to advance as soon as possible. But according to the implement of
> > > > workqueue, the same work_struct is serialized and can not run
> > > > concurrently in fact.
> > > > 
> > > > Quoting from Documentation/core-api/workqueue.rst
> > > > "
> > > > Non-reentrance Conditions
> > > > =========================
> > > > 
> > > > Workqueue guarantees that a work item cannot be re-entrant if the following
> > > > conditions hold after a work item gets queued:
> > > > 
> > > >         1. The work function hasn't been changed.
> > > >         2. No one queues the work item to another workqueue.
> > > >         3. The work item hasn't been reinitiated.
> > > > "
> > > > 
> > > > To allow the concurrence to some extent, it can be achieved by changing
> > > > the work function to break the conditions. As a result, when
> > > > srcu_gp_end() releases srcu_gp_mutex, a new state machine can begin.
> > > > 
> > > > Signed-off-by: Pingfan Liu <kernelfans@gmail.com>
> > > > Cc: Lai Jiangshan <jiangshanlai@gmail.com>
> > > > Cc: "Paul E. McKenney" <paulmck@kernel.org>
> > > > Cc: Frederic Weisbecker <frederic@kernel.org>
> > > > Cc: Josh Triplett <josh@joshtriplett.org>
> > > > Cc: Steven Rostedt <rostedt@goodmis.org>
> > > > Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> > > > Cc: "Zhang, Qiang1" <qiang1.zhang@intel.com>
> > > > To: rcu@vger.kernel.org
> > > > ---
> > > >  kernel/rcu/srcutree.c | 19 +++++++++++++++++++
> > > >  1 file changed, 19 insertions(+)
> > > > 
> > > > diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> > > > index 1c304fec89c0..56dd9bb2c8b8 100644
> > > > --- a/kernel/rcu/srcutree.c
> > > > +++ b/kernel/rcu/srcutree.c
> > > > @@ -75,6 +75,7 @@ static bool __read_mostly srcu_init_done;
> > > >  static void srcu_invoke_callbacks(struct work_struct *work);
> > > >  static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay);
> > > >  static void process_srcu(struct work_struct *work);
> > > > +static void process_srcu_wrap(struct work_struct *work);
> > > >  static void srcu_delay_timer(struct timer_list *t);
> > > >  
> > > >  /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */
> > > > @@ -763,6 +764,11 @@ static void srcu_gp_end(struct srcu_struct *ssp)
> > > >  		cbdelay = 0;
> > > >  
> > > >  	WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns());
> > > > +	/* Change work func so work can be concurrent */
> > > > +	if (ssp->work.work.func == process_srcu_wrap)
> > > > +		ssp->work.work.func = process_srcu;
> > > > +	else
> > > > +		ssp->work.work.func = process_srcu_wrap;
> > > 
> > > This looks really hacky ;-) It would be good that workqueue has an API
> > > to allow "resetting" a work.
> > > 
> > 
> > Indeed, it is hacky and it can be done by using alternative work_struct
> > so that from the workqueue API, it is intact.
> > 
> > > Do you have any number of the potential performance improvement?
> > > 
> > 
> > I will try to bring out one. But the result may heavily depend on the
> > test case.
> > 
> 
> Sorry to update late, I was interrupted by something else.

No worries, thanks for looking into it.

> 
> I used two machine to test
> 
> -1. on 144 cpus hpe-dl560gen10
> 
> modprobe rcutorture   torture_type=srcud  fwd_progress=144 fwd_progress_holdoff=1 shutdown_secs=36000 stat_interval=60 verbose=1
> 
> # no patch
> [37587.632155] srcud:  End-test grace-period state: g28287244 f0x0 total-gps=28287244
> #my patch
> [36443.468017] srcud:  End-test grace-period state: g29026056 f0x0 total-gps=29026056
> 
> 
> -2. on 256 cpus amd-milan
> modprobe rcutorture   torture_type=srcud  fwd_progress=256 fwd_progress_holdoff=1 shutdown_secs=36000 stat_interval=60 verbose=1
> 
> # no patch
> [36093.605732] srcud:  End-test grace-period state: g10850284 f0x0 total-gps=10850284
> #my patch
> [36093.856713] srcud:  End-test grace-period state: g10672632 f0x0 total-gps=10672632
> 
> 
> The first test shows that it has about 2.6% improvement, while the
> second test shows it has no significant effect.
> 
> 
> I wonder if any test case can be in flavour of my patch. Any suggestion?
> 

I actually wait you to tell me that ;-) Your trick clearly can improve
the degree of parallel a bit, but IIUC you need that srcu_end_gp() runs
at the same time with the srcu work function (i.e. pending bit is
cleared) to archieve that. And that's really a small time window ;-)

And there's always the Amdahl's law: do we know whether the state
machine processing of SRCU is the bottleneck in any workload?

And are we willing to give CPU time to SRCU state machine processing
instead of anything else?

My suggestion is to keep this patch somewhere, and whenever you see a
related problem, try the patch and see whether it helps ;-)

Regards,
Boqun

> 
> Thanks,
> 
> 	Pingfan
> 
> > Thanks,
> > 
> > 	Pingfan
> > 
> > > Regards,
> > > Boqun
> > > 
> > > >  	rcu_seq_end(&ssp->srcu_gp_seq);
> > > >  	gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
> > > >  	if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, gpseq))
> > > > @@ -1637,6 +1643,19 @@ static void process_srcu(struct work_struct *work)
> > > >  	srcu_reschedule(ssp, curdelay);
> > > >  }
> > > >  
> > > > +/*
> > > > + * The ssp->work is expected to be concurrent to some extent, but the current
> > > > + * workqueue does not support the concurrence on the same work. (Refer to the
> > > > + * section "Non-reentrance Conditions" in the file workqueue.rst)
> > > > + * Resolving it by changing the work func.
> > > > + *
> > > > + * Prevent compilering from optimizing out it.
> > > > + */
> > > > +static __used void process_srcu_wrap(struct work_struct *work)
> > > > +{
> > > > +	process_srcu(work);
> > > > +}
> > > > +
> > > >  void srcutorture_get_gp_data(enum rcutorture_type test_type,
> > > >  			     struct srcu_struct *ssp, int *flags,
> > > >  			     unsigned long *gp_seq)
> > > > -- 
> > > > 2.31.1
> > > >
Pingfan Liu Dec. 14, 2022, 12:34 p.m. UTC | #7
On Tue, Dec 13, 2022 at 11:51:21AM -0800, Boqun Feng wrote:
> On Tue, Dec 13, 2022 at 09:00:30PM +0800, Pingfan Liu wrote:
> > On Thu, Dec 01, 2022 at 09:53:36PM +0800, Pingfan Liu wrote:
> > > On Wed, Nov 30, 2022 at 08:53:43AM -0800, Boqun Feng wrote:
> > > > On Wed, Nov 30, 2022 at 04:39:02PM +0800, Pingfan Liu wrote:
> > > > > ssp->srcu_cb_mutex is introduced to allow the other srcu state machine
> > > > > to advance as soon as possible. But according to the implement of
> > > > > workqueue, the same work_struct is serialized and can not run
> > > > > concurrently in fact.
> > > > > 
> > > > > Quoting from Documentation/core-api/workqueue.rst
> > > > > "
> > > > > Non-reentrance Conditions
> > > > > =========================
> > > > > 
> > > > > Workqueue guarantees that a work item cannot be re-entrant if the following
> > > > > conditions hold after a work item gets queued:
> > > > > 
> > > > >         1. The work function hasn't been changed.
> > > > >         2. No one queues the work item to another workqueue.
> > > > >         3. The work item hasn't been reinitiated.
> > > > > "
> > > > > 
> > > > > To allow the concurrence to some extent, it can be achieved by changing
> > > > > the work function to break the conditions. As a result, when
> > > > > srcu_gp_end() releases srcu_gp_mutex, a new state machine can begin.
> > > > > 
> > > > > Signed-off-by: Pingfan Liu <kernelfans@gmail.com>
> > > > > Cc: Lai Jiangshan <jiangshanlai@gmail.com>
> > > > > Cc: "Paul E. McKenney" <paulmck@kernel.org>
> > > > > Cc: Frederic Weisbecker <frederic@kernel.org>
> > > > > Cc: Josh Triplett <josh@joshtriplett.org>
> > > > > Cc: Steven Rostedt <rostedt@goodmis.org>
> > > > > Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> > > > > Cc: "Zhang, Qiang1" <qiang1.zhang@intel.com>
> > > > > To: rcu@vger.kernel.org
> > > > > ---
> > > > >  kernel/rcu/srcutree.c | 19 +++++++++++++++++++
> > > > >  1 file changed, 19 insertions(+)
> > > > > 
> > > > > diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
> > > > > index 1c304fec89c0..56dd9bb2c8b8 100644
> > > > > --- a/kernel/rcu/srcutree.c
> > > > > +++ b/kernel/rcu/srcutree.c
> > > > > @@ -75,6 +75,7 @@ static bool __read_mostly srcu_init_done;
> > > > >  static void srcu_invoke_callbacks(struct work_struct *work);
> > > > >  static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay);
> > > > >  static void process_srcu(struct work_struct *work);
> > > > > +static void process_srcu_wrap(struct work_struct *work);
> > > > >  static void srcu_delay_timer(struct timer_list *t);
> > > > >  
> > > > >  /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */
> > > > > @@ -763,6 +764,11 @@ static void srcu_gp_end(struct srcu_struct *ssp)
> > > > >  		cbdelay = 0;
> > > > >  
> > > > >  	WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns());
> > > > > +	/* Change work func so work can be concurrent */
> > > > > +	if (ssp->work.work.func == process_srcu_wrap)
> > > > > +		ssp->work.work.func = process_srcu;
> > > > > +	else
> > > > > +		ssp->work.work.func = process_srcu_wrap;
> > > > 
> > > > This looks really hacky ;-) It would be good that workqueue has an API
> > > > to allow "resetting" a work.
> > > > 
> > > 
> > > Indeed, it is hacky and it can be done by using alternative work_struct
> > > so that from the workqueue API, it is intact.
> > > 
> > > > Do you have any number of the potential performance improvement?
> > > > 
> > > 
> > > I will try to bring out one. But the result may heavily depend on the
> > > test case.
> > > 
> > 
> > Sorry to update late, I was interrupted by something else.
> 
> No worries, thanks for looking into it.
> 
> > 
> > I used two machine to test
> > 
> > -1. on 144 cpus hpe-dl560gen10
> > 
> > modprobe rcutorture   torture_type=srcud  fwd_progress=144 fwd_progress_holdoff=1 shutdown_secs=36000 stat_interval=60 verbose=1
> > 
> > # no patch
> > [37587.632155] srcud:  End-test grace-period state: g28287244 f0x0 total-gps=28287244
> > #my patch
> > [36443.468017] srcud:  End-test grace-period state: g29026056 f0x0 total-gps=29026056
> > 
> > 
> > -2. on 256 cpus amd-milan
> > modprobe rcutorture   torture_type=srcud  fwd_progress=256 fwd_progress_holdoff=1 shutdown_secs=36000 stat_interval=60 verbose=1
> > 
> > # no patch
> > [36093.605732] srcud:  End-test grace-period state: g10850284 f0x0 total-gps=10850284
> > #my patch
> > [36093.856713] srcud:  End-test grace-period state: g10672632 f0x0 total-gps=10672632
> > 
> > 
> > The first test shows that it has about 2.6% improvement, while the
> > second test shows it has no significant effect.
> > 
> > 
> > I wonder if any test case can be in flavour of my patch. Any suggestion?
> > 
> 
> I actually wait you to tell me that ;-) Your trick clearly can improve
> the degree of parallel a bit, but IIUC you need that srcu_end_gp() runs
> at the same time with the srcu work function (i.e. pending bit is
> cleared) to archieve that. And that's really a small time window ;-)
> 

Yes, it is smaller than I expected.

> And there's always the Amdahl's law: do we know whether the state
> machine processing of SRCU is the bottleneck in any workload?
> 
> And are we willing to give CPU time to SRCU state machine processing
> instead of anything else?
> 
> My suggestion is to keep this patch somewhere, and whenever you see a
> related problem, try the patch and see whether it helps ;-)
> 

Yes. Thanks for your suggestion.

Again, appreciate for your help and advice.

Regards,

	Pingfan

> Regards,
> Boqun
> 
> > 
> > Thanks,
> > 
> > 	Pingfan
> > 
> > > Thanks,
> > > 
> > > 	Pingfan
> > > 
> > > > Regards,
> > > > Boqun
> > > > 
> > > > >  	rcu_seq_end(&ssp->srcu_gp_seq);
> > > > >  	gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
> > > > >  	if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, gpseq))
> > > > > @@ -1637,6 +1643,19 @@ static void process_srcu(struct work_struct *work)
> > > > >  	srcu_reschedule(ssp, curdelay);
> > > > >  }
> > > > >  
> > > > > +/*
> > > > > + * The ssp->work is expected to be concurrent to some extent, but the current
> > > > > + * workqueue does not support the concurrence on the same work. (Refer to the
> > > > > + * section "Non-reentrance Conditions" in the file workqueue.rst)
> > > > > + * Resolving it by changing the work func.
> > > > > + *
> > > > > + * Prevent compilering from optimizing out it.
> > > > > + */
> > > > > +static __used void process_srcu_wrap(struct work_struct *work)
> > > > > +{
> > > > > +	process_srcu(work);
> > > > > +}
> > > > > +
> > > > >  void srcutorture_get_gp_data(enum rcutorture_type test_type,
> > > > >  			     struct srcu_struct *ssp, int *flags,
> > > > >  			     unsigned long *gp_seq)
> > > > > -- 
> > > > > 2.31.1
> > > > >
diff mbox series

Patch

diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 1c304fec89c0..56dd9bb2c8b8 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -75,6 +75,7 @@  static bool __read_mostly srcu_init_done;
 static void srcu_invoke_callbacks(struct work_struct *work);
 static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay);
 static void process_srcu(struct work_struct *work);
+static void process_srcu_wrap(struct work_struct *work);
 static void srcu_delay_timer(struct timer_list *t);
 
 /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */
@@ -763,6 +764,11 @@  static void srcu_gp_end(struct srcu_struct *ssp)
 		cbdelay = 0;
 
 	WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns());
+	/* Change work func so work can be concurrent */
+	if (ssp->work.work.func == process_srcu_wrap)
+		ssp->work.work.func = process_srcu;
+	else
+		ssp->work.work.func = process_srcu_wrap;
 	rcu_seq_end(&ssp->srcu_gp_seq);
 	gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
 	if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, gpseq))
@@ -1637,6 +1643,19 @@  static void process_srcu(struct work_struct *work)
 	srcu_reschedule(ssp, curdelay);
 }
 
+/*
+ * The ssp->work is expected to be concurrent to some extent, but the current
+ * workqueue does not support the concurrence on the same work. (Refer to the
+ * section "Non-reentrance Conditions" in the file workqueue.rst)
+ * Resolving it by changing the work func.
+ *
+ * Prevent compilering from optimizing out it.
+ */
+static __used void process_srcu_wrap(struct work_struct *work)
+{
+	process_srcu(work);
+}
+
 void srcutorture_get_gp_data(enum rcutorture_type test_type,
 			     struct srcu_struct *ssp, int *flags,
 			     unsigned long *gp_seq)