diff mbox series

[01/12] SUNRPC: make rqst_should_sleep() idempotent()

Message ID 20230731064839.7729-2-neilb@suse.de (mailing list archive)
State New, archived
Headers show
Series SUNRPC: various thread management improvements | expand

Commit Message

NeilBrown July 31, 2023, 6:48 a.m. UTC
Based on its name you would think that rqst_should_sleep() would be
read-only, not changing anything.  But it fact it will clear
SP_TASK_PENDING if that was set.  This is surprising, and it blurs the
line between "check for work to do" and "dequeue work to do".

So change the "test_and_clear" to simple "test" and clear the bit once
the thread has decided to wake up and return to the caller.

With this, it makes sense to *always* set SP_TASK_PENDING when asked,
rather than only to set it if no thread could be woken up.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 net/sunrpc/svc_xprt.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

Comments

Chuck Lever July 31, 2023, 2:21 p.m. UTC | #1
On Mon, Jul 31, 2023 at 04:48:28PM +1000, NeilBrown wrote:
> Based on its name you would think that rqst_should_sleep() would be
> read-only, not changing anything.  But it fact it will clear
> SP_TASK_PENDING if that was set.  This is surprising, and it blurs the
> line between "check for work to do" and "dequeue work to do".

I agree that rqst_should_sleep() sounds like it should be a
predicate without side effects.


> So change the "test_and_clear" to simple "test" and clear the bit once
> the thread has decided to wake up and return to the caller.
> 
> With this, it makes sense to *always* set SP_TASK_PENDING when asked,
> rather than only to set it if no thread could be woken up.

I'm lost here. Why does always setting TASK_PENDING now make sense?
If there's no task pending, won't this trigger a wake up when there
is nothing to do?


> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  net/sunrpc/svc_xprt.c | 8 +++++---
>  1 file changed, 5 insertions(+), 3 deletions(-)
> 
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index cd92cb54132d..380fb3caea4c 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -581,8 +581,8 @@ void svc_wake_up(struct svc_serv *serv)
>  {
>  	struct svc_pool *pool = &serv->sv_pools[0];
>  
> -	if (!svc_pool_wake_idle_thread(serv, pool))
> -		set_bit(SP_TASK_PENDING, &pool->sp_flags);
> +	set_bit(SP_TASK_PENDING, &pool->sp_flags);
> +	svc_pool_wake_idle_thread(serv, pool);
>  }
>  EXPORT_SYMBOL_GPL(svc_wake_up);
>  
> @@ -704,7 +704,7 @@ rqst_should_sleep(struct svc_rqst *rqstp)
>  	struct svc_pool		*pool = rqstp->rq_pool;
>  
>  	/* did someone call svc_wake_up? */
> -	if (test_and_clear_bit(SP_TASK_PENDING, &pool->sp_flags))
> +	if (test_bit(SP_TASK_PENDING, &pool->sp_flags))
>  		return false;
>  
>  	/* was a socket queued? */
> @@ -750,6 +750,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp)
>  
>  	set_bit(RQ_BUSY, &rqstp->rq_flags);
>  	smp_mb__after_atomic();
> +	clear_bit(SP_TASK_PENDING, &pool->sp_flags);

Why wouldn't this go before the smp_mb__after_atomic()?


>  	rqstp->rq_xprt = svc_xprt_dequeue(pool);
>  	if (rqstp->rq_xprt) {
>  		trace_svc_pool_awoken(rqstp);
> @@ -761,6 +762,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp)
>  	percpu_counter_inc(&pool->sp_threads_no_work);
>  	return NULL;
>  out_found:
> +	clear_bit(SP_TASK_PENDING, &pool->sp_flags);

clear_bit_unlock ?

>  	/* Normally we will wait up to 5 seconds for any required
>  	 * cache information to be provided.
>  	 */
> -- 
> 2.40.1
>
Jeff Layton July 31, 2023, 2:33 p.m. UTC | #2
On Mon, 2023-07-31 at 16:48 +1000, NeilBrown wrote:
> Based on its name you would think that rqst_should_sleep() would be
> read-only, not changing anything.  But it fact it will clear
> SP_TASK_PENDING if that was set.  This is surprising, and it blurs the
> line between "check for work to do" and "dequeue work to do".
> 
> So change the "test_and_clear" to simple "test" and clear the bit once
> the thread has decided to wake up and return to the caller.
> 
> With this, it makes sense to *always* set SP_TASK_PENDING when asked,
> rather than only to set it if no thread could be woken up.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  net/sunrpc/svc_xprt.c | 8 +++++---
>  1 file changed, 5 insertions(+), 3 deletions(-)
> 
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index cd92cb54132d..380fb3caea4c 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -581,8 +581,8 @@ void svc_wake_up(struct svc_serv *serv)
>  {
>  	struct svc_pool *pool = &serv->sv_pools[0];
>  
> -	if (!svc_pool_wake_idle_thread(serv, pool))
> -		set_bit(SP_TASK_PENDING, &pool->sp_flags);
> +	set_bit(SP_TASK_PENDING, &pool->sp_flags);
> +	svc_pool_wake_idle_thread(serv, pool);
>  }
>  EXPORT_SYMBOL_GPL(svc_wake_up);
>  
> @@ -704,7 +704,7 @@ rqst_should_sleep(struct svc_rqst *rqstp)
>  	struct svc_pool		*pool = rqstp->rq_pool;
>  
>  	/* did someone call svc_wake_up? */
> -	if (test_and_clear_bit(SP_TASK_PENDING, &pool->sp_flags))
> +	if (test_bit(SP_TASK_PENDING, &pool->sp_flags))
>  		return false;
>  
>  	/* was a socket queued? */
> @@ -750,6 +750,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp)
>  
>  	set_bit(RQ_BUSY, &rqstp->rq_flags);
>  	smp_mb__after_atomic();
> +	clear_bit(SP_TASK_PENDING, &pool->sp_flags);

Took me a few mins to decide that splitting up the test_and_clear_bit
didn't open a ToC/ToU race. I think we're saved by the fact that only
nfsd thread itself clears the bit, so we're guaranteed not to race with
another clear (whew).
 
>  	rqstp->rq_xprt = svc_xprt_dequeue(pool);
>  	if (rqstp->rq_xprt) {
>  		trace_svc_pool_awoken(rqstp);
> @@ -761,6 +762,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp)
>  	percpu_counter_inc(&pool->sp_threads_no_work);
>  	return NULL;
>  out_found:
> +	clear_bit(SP_TASK_PENDING, &pool->sp_flags);
>  	/* Normally we will wait up to 5 seconds for any required
>  	 * cache information to be provided.
>  	 */

Reviewed-by: Jeff Layton <jlayton@kernel.org>
NeilBrown July 31, 2023, 10:05 p.m. UTC | #3
On Tue, 01 Aug 2023, Chuck Lever wrote:
> On Mon, Jul 31, 2023 at 04:48:28PM +1000, NeilBrown wrote:
> > Based on its name you would think that rqst_should_sleep() would be
> > read-only, not changing anything.  But it fact it will clear
> > SP_TASK_PENDING if that was set.  This is surprising, and it blurs the
> > line between "check for work to do" and "dequeue work to do".
> 
> I agree that rqst_should_sleep() sounds like it should be a
> predicate without side effects.
> 
> 
> > So change the "test_and_clear" to simple "test" and clear the bit once
> > the thread has decided to wake up and return to the caller.
> > 
> > With this, it makes sense to *always* set SP_TASK_PENDING when asked,
> > rather than only to set it if no thread could be woken up.
> 
> I'm lost here. Why does always setting TASK_PENDING now make sense?
> If there's no task pending, won't this trigger a wake up when there
> is nothing to do?

Clearly Jedi mind tricks don't work on you...  I'll have to try logic
instead.

 This separation of "test" and "clear" is a first step in re-organising
 the queueing of tasks around a clear pattern of "client queues a task",
 "service checks if any tasks are queued" and "service dequeues and
 performs a task".  The first step for TASK_PENDING doesn't quite follow
 a clear pattern as the flag is only set (the work is only queued) if
 no thread could be immediately woken.  This imposes on the
 implementation of the service.  For example, whenever a service is
 woken it *must* return to the caller of svc_recv(), just in case it was
 woken by svc_wake_up().  It cannot test if there is work to do, and if
 not - go back to sleep.  It provides a cleaner implementation of the
 pattern to *always* queue the work.  i.e. *always* set the flag.  Which
 ever thread first sees and clears the flag will return to caller and
 perform the required work.  If the woken thread doesn't find anything
 to do, it could go back to sleep (though currently it doesn't).

If that more convincing?

Thanks,
NeilBrown


> 
> 
> > Signed-off-by: NeilBrown <neilb@suse.de>
> > ---
> >  net/sunrpc/svc_xprt.c | 8 +++++---
> >  1 file changed, 5 insertions(+), 3 deletions(-)
> > 
> > diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> > index cd92cb54132d..380fb3caea4c 100644
> > --- a/net/sunrpc/svc_xprt.c
> > +++ b/net/sunrpc/svc_xprt.c
> > @@ -581,8 +581,8 @@ void svc_wake_up(struct svc_serv *serv)
> >  {
> >  	struct svc_pool *pool = &serv->sv_pools[0];
> >  
> > -	if (!svc_pool_wake_idle_thread(serv, pool))
> > -		set_bit(SP_TASK_PENDING, &pool->sp_flags);
> > +	set_bit(SP_TASK_PENDING, &pool->sp_flags);
> > +	svc_pool_wake_idle_thread(serv, pool);
> >  }
> >  EXPORT_SYMBOL_GPL(svc_wake_up);
> >  
> > @@ -704,7 +704,7 @@ rqst_should_sleep(struct svc_rqst *rqstp)
> >  	struct svc_pool		*pool = rqstp->rq_pool;
> >  
> >  	/* did someone call svc_wake_up? */
> > -	if (test_and_clear_bit(SP_TASK_PENDING, &pool->sp_flags))
> > +	if (test_bit(SP_TASK_PENDING, &pool->sp_flags))
> >  		return false;
> >  
> >  	/* was a socket queued? */
> > @@ -750,6 +750,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp)
> >  
> >  	set_bit(RQ_BUSY, &rqstp->rq_flags);
> >  	smp_mb__after_atomic();
> > +	clear_bit(SP_TASK_PENDING, &pool->sp_flags);
> 
> Why wouldn't this go before the smp_mb__after_atomic()?
> 
> 
> >  	rqstp->rq_xprt = svc_xprt_dequeue(pool);
> >  	if (rqstp->rq_xprt) {
> >  		trace_svc_pool_awoken(rqstp);
> > @@ -761,6 +762,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp)
> >  	percpu_counter_inc(&pool->sp_threads_no_work);
> >  	return NULL;
> >  out_found:
> > +	clear_bit(SP_TASK_PENDING, &pool->sp_flags);
> 
> clear_bit_unlock ?
> 
> >  	/* Normally we will wait up to 5 seconds for any required
> >  	 * cache information to be provided.
> >  	 */
> > -- 
> > 2.40.1
> > 
> 
> -- 
> Chuck Lever
>
Chuck Lever July 31, 2023, 10:31 p.m. UTC | #4
> On Jul 31, 2023, at 6:05 PM, NeilBrown <neilb@suse.de> wrote:
> 
> On Tue, 01 Aug 2023, Chuck Lever wrote:
>> On Mon, Jul 31, 2023 at 04:48:28PM +1000, NeilBrown wrote:
>>> Based on its name you would think that rqst_should_sleep() would be
>>> read-only, not changing anything.  But it fact it will clear
>>> SP_TASK_PENDING if that was set.  This is surprising, and it blurs the
>>> line between "check for work to do" and "dequeue work to do".
>> 
>> I agree that rqst_should_sleep() sounds like it should be a
>> predicate without side effects.
>> 
>> 
>>> So change the "test_and_clear" to simple "test" and clear the bit once
>>> the thread has decided to wake up and return to the caller.
>>> 
>>> With this, it makes sense to *always* set SP_TASK_PENDING when asked,
>>> rather than only to set it if no thread could be woken up.
>> 
>> I'm lost here. Why does always setting TASK_PENDING now make sense?
>> If there's no task pending, won't this trigger a wake up when there
>> is nothing to do?
> 
> Clearly Jedi mind tricks don't work on you...  I'll have to try logic
> instead.
> 
> This separation of "test" and "clear" is a first step in re-organising
> the queueing of tasks around a clear pattern of "client queues a task",
> "service checks if any tasks are queued" and "service dequeues and
> performs a task".  The first step for TASK_PENDING doesn't quite follow
> a clear pattern as the flag is only set (the work is only queued) if
> no thread could be immediately woken.  This imposes on the
> implementation of the service.  For example, whenever a service is
> woken it *must* return to the caller of svc_recv(), just in case it was
> woken by svc_wake_up().  It cannot test if there is work to do, and if
> not - go back to sleep.  It provides a cleaner implementation of the
> pattern to *always* queue the work.  i.e. *always* set the flag.  Which
> ever thread first sees and clears the flag will return to caller and
> perform the required work.  If the woken thread doesn't find anything
> to do, it could go back to sleep (though currently it doesn't).
> 
> If that more convincing?

Well, that makes sense if TASK_PENDING means the same as XPT_BUSY
except it marks the whole pool busy instead of only a particular
transport.

We're always setting XPT_BUSY this way, for instance -- if there's
work to do, whether or not a wake-up was successful, this bit is
set.

The meaning of TASK_PENDING before this patch was "the wake-up
didn't succeed, so make sure to go back and look for the work".
Your patch changes that.

So, perhaps the patch description needs to document that
particular change in semantics rather than describing only the
code changes. Or we could rename the bit SP_BUSY or something a
little more self-explanatory.

(A more crazy idea would be to add a phony transport to each
pool that acts as the locus for these transportless tasks).

I do not object to the patch. It's just hard to square up the
current description with the patch itself. Speaking, of course,
as someone who will no doubt have to read this description
in a year or two and ask "what were we thinking?"


> Thanks,
> NeilBrown
> 
> 
>> 
>> 
>>> Signed-off-by: NeilBrown <neilb@suse.de>
>>> ---
>>> net/sunrpc/svc_xprt.c | 8 +++++---
>>> 1 file changed, 5 insertions(+), 3 deletions(-)
>>> 
>>> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
>>> index cd92cb54132d..380fb3caea4c 100644
>>> --- a/net/sunrpc/svc_xprt.c
>>> +++ b/net/sunrpc/svc_xprt.c
>>> @@ -581,8 +581,8 @@ void svc_wake_up(struct svc_serv *serv)
>>> {
>>> struct svc_pool *pool = &serv->sv_pools[0];
>>> 
>>> - if (!svc_pool_wake_idle_thread(serv, pool))
>>> - set_bit(SP_TASK_PENDING, &pool->sp_flags);
>>> + set_bit(SP_TASK_PENDING, &pool->sp_flags);
>>> + svc_pool_wake_idle_thread(serv, pool);
>>> }
>>> EXPORT_SYMBOL_GPL(svc_wake_up);
>>> 
>>> @@ -704,7 +704,7 @@ rqst_should_sleep(struct svc_rqst *rqstp)
>>> struct svc_pool *pool = rqstp->rq_pool;
>>> 
>>> /* did someone call svc_wake_up? */
>>> - if (test_and_clear_bit(SP_TASK_PENDING, &pool->sp_flags))
>>> + if (test_bit(SP_TASK_PENDING, &pool->sp_flags))
>>> return false;
>>> 
>>> /* was a socket queued? */
>>> @@ -750,6 +750,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp)
>>> 
>>> set_bit(RQ_BUSY, &rqstp->rq_flags);
>>> smp_mb__after_atomic();
>>> + clear_bit(SP_TASK_PENDING, &pool->sp_flags);
>> 
>> Why wouldn't this go before the smp_mb__after_atomic()?
>> 
>> 
>>> rqstp->rq_xprt = svc_xprt_dequeue(pool);
>>> if (rqstp->rq_xprt) {
>>> trace_svc_pool_awoken(rqstp);
>>> @@ -761,6 +762,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp)
>>> percpu_counter_inc(&pool->sp_threads_no_work);
>>> return NULL;
>>> out_found:
>>> + clear_bit(SP_TASK_PENDING, &pool->sp_flags);
>> 
>> clear_bit_unlock ?
>> 
>>> /* Normally we will wait up to 5 seconds for any required
>>>  * cache information to be provided.
>>>  */
>>> -- 
>>> 2.40.1
>>> 
>> 
>> -- 
>> Chuck Lever


--
Chuck Lever
diff mbox series

Patch

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index cd92cb54132d..380fb3caea4c 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -581,8 +581,8 @@  void svc_wake_up(struct svc_serv *serv)
 {
 	struct svc_pool *pool = &serv->sv_pools[0];
 
-	if (!svc_pool_wake_idle_thread(serv, pool))
-		set_bit(SP_TASK_PENDING, &pool->sp_flags);
+	set_bit(SP_TASK_PENDING, &pool->sp_flags);
+	svc_pool_wake_idle_thread(serv, pool);
 }
 EXPORT_SYMBOL_GPL(svc_wake_up);
 
@@ -704,7 +704,7 @@  rqst_should_sleep(struct svc_rqst *rqstp)
 	struct svc_pool		*pool = rqstp->rq_pool;
 
 	/* did someone call svc_wake_up? */
-	if (test_and_clear_bit(SP_TASK_PENDING, &pool->sp_flags))
+	if (test_bit(SP_TASK_PENDING, &pool->sp_flags))
 		return false;
 
 	/* was a socket queued? */
@@ -750,6 +750,7 @@  static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp)
 
 	set_bit(RQ_BUSY, &rqstp->rq_flags);
 	smp_mb__after_atomic();
+	clear_bit(SP_TASK_PENDING, &pool->sp_flags);
 	rqstp->rq_xprt = svc_xprt_dequeue(pool);
 	if (rqstp->rq_xprt) {
 		trace_svc_pool_awoken(rqstp);
@@ -761,6 +762,7 @@  static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp)
 	percpu_counter_inc(&pool->sp_threads_no_work);
 	return NULL;
 out_found:
+	clear_bit(SP_TASK_PENDING, &pool->sp_flags);
 	/* Normally we will wait up to 5 seconds for any required
 	 * cache information to be provided.
 	 */