Message ID | 20230802073443.17965-6-neilb@suse.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | SUNRPC: thread management improvements | expand |
On Wed, Aug 02, 2023 at 05:34:42PM +1000, NeilBrown wrote: > Rather than searching a list of threads to find an idle one, having a > list of idle threads allows an idle thread to be found immediately. > > This adds some spin_lock calls which is not ideal, but as the hold-time > is tiny it is still faster than searching a list. Keep in mind that b1691bc03d4e ("sunrpc: convert to lockless lookup of queued server threads") did the opposite because that very spin_lock was highly contended. I am skeptical of the above claim without lock_stat data... but that's sort of moot as this is a temporary situation, as you point out next. > A future patch will > remove them using llist.h. This involves some subtlety and so is left > to a separate patch. Since I haven't seen that patch yet, I'm reserving judgement about whether and how these two changes might be merged. > This removes the need for the RQ_BUSY flag. The rqst is "busy" > precisely when it is not on the "idle" list. I've been having some trouble with this one. The server system deadlocks hard as soon as the NFS server starts. I tracked it down this morning: this patch never initialized the sp_idle_threads list_head. I will apply this patch (with one-line fix) and the patch that removes SP_CONGESTED once I hear from the client folks on the "integrate backchannel" patch. > Signed-off-by: NeilBrown <neilb@suse.de> > --- > include/linux/sunrpc/svc.h | 25 ++++++++++++++++++++++++- > include/trace/events/sunrpc.h | 1 - > net/sunrpc/svc.c | 13 ++++++++----- > net/sunrpc/svc_xprt.c | 15 +++++++++++---- > 4 files changed, 43 insertions(+), 11 deletions(-) > > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h > index 1ac6f74781aa..8b93af92dd53 100644 > --- a/include/linux/sunrpc/svc.h > +++ b/include/linux/sunrpc/svc.h > @@ -37,6 +37,7 @@ struct svc_pool { > struct list_head sp_sockets; /* pending sockets */ > unsigned int sp_nrthreads; /* # of threads in pool */ > struct list_head sp_all_threads; /* all server threads */ > + struct list_head sp_idle_threads; /* idle server threads */ > > /* statistics on pool operation */ > struct percpu_counter sp_messages_arrived; > @@ -186,6 +187,7 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp); > */ > struct svc_rqst { > struct list_head rq_all; /* all threads list */ > + struct list_head rq_idle; /* On the idle list */ > struct rcu_head rq_rcu_head; /* for RCU deferred kfree */ > struct svc_xprt * rq_xprt; /* transport ptr */ > > @@ -262,10 +264,31 @@ enum { > RQ_SPLICE_OK, /* turned off in gss privacy to prevent > * encrypting page cache pages */ > RQ_VICTIM, /* Have agreed to shut down */ > - RQ_BUSY, /* request is busy */ > RQ_DATA, /* request has data */ > }; > > +/** > + * svc_thread_set_busy - mark a thread as busy > + * @rqstp: the thread which is now busy > + * > + * If rq_idle is "empty", the thread must be busy. > + */ > +static inline void svc_thread_set_busy(struct svc_rqst *rqstp) > +{ > + INIT_LIST_HEAD(&rqstp->rq_idle); > +} > + > +/** > + * svc_thread_busy - check if a thread as busy > + * @rqstp: the thread which might be busy > + * > + * If rq_idle is "empty", the thread must be busy. > + */ > +static inline bool svc_thread_busy(struct svc_rqst *rqstp) > +{ > + return list_empty(&rqstp->rq_idle); > +} > + > #define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net) > > /* > diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h > index 6beb38c1dcb5..337c90787fb1 100644 > --- a/include/trace/events/sunrpc.h > +++ b/include/trace/events/sunrpc.h > @@ -1677,7 +1677,6 @@ DEFINE_SVCXDRBUF_EVENT(sendto); > svc_rqst_flag(DROPME) \ > svc_rqst_flag(SPLICE_OK) \ > svc_rqst_flag(VICTIM) \ > - svc_rqst_flag(BUSY) \ > svc_rqst_flag_end(DATA) > > #undef svc_rqst_flag > diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c > index 1233d72714b9..dce433dea1bd 100644 > --- a/net/sunrpc/svc.c > +++ b/net/sunrpc/svc.c > @@ -641,7 +641,7 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node) > > folio_batch_init(&rqstp->rq_fbatch); > > - __set_bit(RQ_BUSY, &rqstp->rq_flags); > + svc_thread_set_busy(rqstp); > rqstp->rq_server = serv; > rqstp->rq_pool = pool; > > @@ -702,10 +702,13 @@ void svc_pool_wake_idle_thread(struct svc_pool *pool) > struct svc_rqst *rqstp; > > rcu_read_lock(); > - list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { > - if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) > - continue; > - > + spin_lock_bh(&pool->sp_lock); > + rqstp = list_first_entry_or_null(&pool->sp_idle_threads, > + struct svc_rqst, rq_idle); > + if (rqstp) > + list_del_init(&rqstp->rq_idle); > + spin_unlock_bh(&pool->sp_lock); > + if (rqstp) { > WRITE_ONCE(rqstp->rq_qtime, ktime_get()); > wake_up_process(rqstp->rq_task); > rcu_read_unlock(); > diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c > index 0a300ae6a7ed..e44efcc21b63 100644 > --- a/net/sunrpc/svc_xprt.c > +++ b/net/sunrpc/svc_xprt.c > @@ -737,8 +737,9 @@ static void svc_rqst_wait_for_work(struct svc_rqst *rqstp) > set_current_state(TASK_IDLE); > smp_mb__before_atomic(); > clear_bit(SP_CONGESTED, &pool->sp_flags); > - clear_bit(RQ_BUSY, &rqstp->rq_flags); > - smp_mb__after_atomic(); > + spin_lock_bh(&pool->sp_lock); > + list_add(&rqstp->rq_idle, &pool->sp_idle_threads); > + spin_unlock_bh(&pool->sp_lock); > > /* Need to check should_sleep() again after > * setting task state in case a wakeup happened > @@ -751,8 +752,14 @@ static void svc_rqst_wait_for_work(struct svc_rqst *rqstp) > cond_resched(); > } > > - set_bit(RQ_BUSY, &rqstp->rq_flags); > - smp_mb__after_atomic(); > + /* We *must* be removed from the list before we can continue. > + * If we were woken, this is already done > + */ > + if (!svc_thread_busy(rqstp)) { > + spin_lock_bh(&pool->sp_lock); > + list_del_init(&rqstp->rq_idle); > + spin_unlock_bh(&pool->sp_lock); > + } > } else > cond_resched(); > try_to_freeze(); > -- > 2.40.1 >
On Tue, 15 Aug 2023, Chuck Lever wrote: > On Wed, Aug 02, 2023 at 05:34:42PM +1000, NeilBrown wrote: > > Rather than searching a list of threads to find an idle one, having a > > list of idle threads allows an idle thread to be found immediately. > > > > This adds some spin_lock calls which is not ideal, but as the hold-time > > is tiny it is still faster than searching a list. > > Keep in mind that b1691bc03d4e ("sunrpc: convert to lockless lookup > of queued server threads") did the opposite because that very > spin_lock was highly contended. I am skeptical of the above claim > without lock_stat data... but that's sort of moot as this is a > temporary situation, as you point out next. The old code did a lot more writes in the spin-locked region than this code - so more hold-time. But as you say - we would need data rather than speculation if this were to be more than an interim state. > > > > A future patch will > > remove them using llist.h. This involves some subtlety and so is left > > to a separate patch. > > Since I haven't seen that patch yet, I'm reserving judgement about > whether and how these two changes might be merged. I'll try to send the remainder of the series today. > > > > This removes the need for the RQ_BUSY flag. The rqst is "busy" > > precisely when it is not on the "idle" list. > > I've been having some trouble with this one. The server system > deadlocks hard as soon as the NFS server starts. I tracked it down > this morning: this patch never initialized the sp_idle_threads > list_head. Whoops. Looks like I didn't test this particular intermediate state. > > I will apply this patch (with one-line fix) and the patch that > removes SP_CONGESTED once I hear from the client folks on the > "integrate backchannel" patch. Thanks, NeilBrown
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 1ac6f74781aa..8b93af92dd53 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -37,6 +37,7 @@ struct svc_pool { struct list_head sp_sockets; /* pending sockets */ unsigned int sp_nrthreads; /* # of threads in pool */ struct list_head sp_all_threads; /* all server threads */ + struct list_head sp_idle_threads; /* idle server threads */ /* statistics on pool operation */ struct percpu_counter sp_messages_arrived; @@ -186,6 +187,7 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp); */ struct svc_rqst { struct list_head rq_all; /* all threads list */ + struct list_head rq_idle; /* On the idle list */ struct rcu_head rq_rcu_head; /* for RCU deferred kfree */ struct svc_xprt * rq_xprt; /* transport ptr */ @@ -262,10 +264,31 @@ enum { RQ_SPLICE_OK, /* turned off in gss privacy to prevent * encrypting page cache pages */ RQ_VICTIM, /* Have agreed to shut down */ - RQ_BUSY, /* request is busy */ RQ_DATA, /* request has data */ }; +/** + * svc_thread_set_busy - mark a thread as busy + * @rqstp: the thread which is now busy + * + * If rq_idle is "empty", the thread must be busy. + */ +static inline void svc_thread_set_busy(struct svc_rqst *rqstp) +{ + INIT_LIST_HEAD(&rqstp->rq_idle); +} + +/** + * svc_thread_busy - check if a thread as busy + * @rqstp: the thread which might be busy + * + * If rq_idle is "empty", the thread must be busy. + */ +static inline bool svc_thread_busy(struct svc_rqst *rqstp) +{ + return list_empty(&rqstp->rq_idle); +} + #define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net) /* diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 6beb38c1dcb5..337c90787fb1 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1677,7 +1677,6 @@ DEFINE_SVCXDRBUF_EVENT(sendto); svc_rqst_flag(DROPME) \ svc_rqst_flag(SPLICE_OK) \ svc_rqst_flag(VICTIM) \ - svc_rqst_flag(BUSY) \ svc_rqst_flag_end(DATA) #undef svc_rqst_flag diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 1233d72714b9..dce433dea1bd 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -641,7 +641,7 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node) folio_batch_init(&rqstp->rq_fbatch); - __set_bit(RQ_BUSY, &rqstp->rq_flags); + svc_thread_set_busy(rqstp); rqstp->rq_server = serv; rqstp->rq_pool = pool; @@ -702,10 +702,13 @@ void svc_pool_wake_idle_thread(struct svc_pool *pool) struct svc_rqst *rqstp; rcu_read_lock(); - list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { - if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) - continue; - + spin_lock_bh(&pool->sp_lock); + rqstp = list_first_entry_or_null(&pool->sp_idle_threads, + struct svc_rqst, rq_idle); + if (rqstp) + list_del_init(&rqstp->rq_idle); + spin_unlock_bh(&pool->sp_lock); + if (rqstp) { WRITE_ONCE(rqstp->rq_qtime, ktime_get()); wake_up_process(rqstp->rq_task); rcu_read_unlock(); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 0a300ae6a7ed..e44efcc21b63 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -737,8 +737,9 @@ static void svc_rqst_wait_for_work(struct svc_rqst *rqstp) set_current_state(TASK_IDLE); smp_mb__before_atomic(); clear_bit(SP_CONGESTED, &pool->sp_flags); - clear_bit(RQ_BUSY, &rqstp->rq_flags); - smp_mb__after_atomic(); + spin_lock_bh(&pool->sp_lock); + list_add(&rqstp->rq_idle, &pool->sp_idle_threads); + spin_unlock_bh(&pool->sp_lock); /* Need to check should_sleep() again after * setting task state in case a wakeup happened @@ -751,8 +752,14 @@ static void svc_rqst_wait_for_work(struct svc_rqst *rqstp) cond_resched(); } - set_bit(RQ_BUSY, &rqstp->rq_flags); - smp_mb__after_atomic(); + /* We *must* be removed from the list before we can continue. + * If we were woken, this is already done + */ + if (!svc_thread_busy(rqstp)) { + spin_lock_bh(&pool->sp_lock); + list_del_init(&rqstp->rq_idle); + spin_unlock_bh(&pool->sp_lock); + } } else cond_resched(); try_to_freeze();
Rather than searching a list of threads to find an idle one, having a list of idle threads allows an idle thread to be found immediately. This adds some spin_lock calls which is not ideal, but as the hold-time is tiny it is still faster than searching a list. A future patch will remove them using llist.h. This involves some subtlety and so is left to a separate patch. This removes the need for the RQ_BUSY flag. The rqst is "busy" precisely when it is not on the "idle" list. Signed-off-by: NeilBrown <neilb@suse.de> --- include/linux/sunrpc/svc.h | 25 ++++++++++++++++++++++++- include/trace/events/sunrpc.h | 1 - net/sunrpc/svc.c | 13 ++++++++----- net/sunrpc/svc_xprt.c | 15 +++++++++++---- 4 files changed, 43 insertions(+), 11 deletions(-)