diff mbox series

[5/6] SUNRPC: add list of idle threads

Message ID 20230802073443.17965-6-neilb@suse.de (mailing list archive)
State New, archived
Headers show
Series SUNRPC: thread management improvements | expand

Commit Message

NeilBrown Aug. 2, 2023, 7:34 a.m. UTC
Rather than searching a list of threads to find an idle one, having a
list of idle threads allows an idle thread to be found immediately.

This adds some spin_lock calls which is not ideal, but as the hold-time
is tiny it is still faster than searching a list.  A future patch will
remove them using llist.h.  This involves some subtlety and so is left
to a separate patch.

This removes the need for the RQ_BUSY flag.  The rqst is "busy"
precisely when it is not on the "idle" list.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 include/linux/sunrpc/svc.h    | 25 ++++++++++++++++++++++++-
 include/trace/events/sunrpc.h |  1 -
 net/sunrpc/svc.c              | 13 ++++++++-----
 net/sunrpc/svc_xprt.c         | 15 +++++++++++----
 4 files changed, 43 insertions(+), 11 deletions(-)

Comments

Chuck Lever Aug. 14, 2023, 5:28 p.m. UTC | #1
On Wed, Aug 02, 2023 at 05:34:42PM +1000, NeilBrown wrote:
> Rather than searching a list of threads to find an idle one, having a
> list of idle threads allows an idle thread to be found immediately.
> 
> This adds some spin_lock calls which is not ideal, but as the hold-time
> is tiny it is still faster than searching a list.

Keep in mind that b1691bc03d4e ("sunrpc: convert to lockless lookup
of queued server threads") did the opposite because that very
spin_lock was highly contended. I am skeptical of the above claim
without lock_stat data... but that's sort of moot as this is a
temporary situation, as you point out next.


> A future patch will
> remove them using llist.h.  This involves some subtlety and so is left
> to a separate patch.

Since I haven't seen that patch yet, I'm reserving judgement about
whether and how these two changes might be merged.


> This removes the need for the RQ_BUSY flag.  The rqst is "busy"
> precisely when it is not on the "idle" list.

I've been having some trouble with this one. The server system
deadlocks hard as soon as the NFS server starts. I tracked it down
this morning: this patch never initialized the sp_idle_threads
list_head.

I will apply this patch (with one-line fix) and the patch that
removes SP_CONGESTED once I hear from the client folks on the
"integrate backchannel" patch.


> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  include/linux/sunrpc/svc.h    | 25 ++++++++++++++++++++++++-
>  include/trace/events/sunrpc.h |  1 -
>  net/sunrpc/svc.c              | 13 ++++++++-----
>  net/sunrpc/svc_xprt.c         | 15 +++++++++++----
>  4 files changed, 43 insertions(+), 11 deletions(-)
> 
> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> index 1ac6f74781aa..8b93af92dd53 100644
> --- a/include/linux/sunrpc/svc.h
> +++ b/include/linux/sunrpc/svc.h
> @@ -37,6 +37,7 @@ struct svc_pool {
>  	struct list_head	sp_sockets;	/* pending sockets */
>  	unsigned int		sp_nrthreads;	/* # of threads in pool */
>  	struct list_head	sp_all_threads;	/* all server threads */
> +	struct list_head	sp_idle_threads; /* idle server threads */
>  
>  	/* statistics on pool operation */
>  	struct percpu_counter	sp_messages_arrived;
> @@ -186,6 +187,7 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp);
>   */
>  struct svc_rqst {
>  	struct list_head	rq_all;		/* all threads list */
> +	struct list_head	rq_idle;	/* On the idle list */
>  	struct rcu_head		rq_rcu_head;	/* for RCU deferred kfree */
>  	struct svc_xprt *	rq_xprt;	/* transport ptr */
>  
> @@ -262,10 +264,31 @@ enum {
>  	RQ_SPLICE_OK,		/* turned off in gss privacy to prevent
>  				 * encrypting page cache pages */
>  	RQ_VICTIM,		/* Have agreed to shut down */
> -	RQ_BUSY,		/* request is busy */
>  	RQ_DATA,		/* request has data */
>  };
>  
> +/**
> + * svc_thread_set_busy - mark a thread as busy
> + * @rqstp: the thread which is now busy
> + *
> + * If rq_idle is "empty", the thread must be busy.
> + */
> +static inline void svc_thread_set_busy(struct svc_rqst *rqstp)
> +{
> +	INIT_LIST_HEAD(&rqstp->rq_idle);
> +}
> +
> +/**
> + * svc_thread_busy - check if a thread as busy
> + * @rqstp: the thread which might be busy
> + *
> + * If rq_idle is "empty", the thread must be busy.
> + */
> +static inline bool svc_thread_busy(struct svc_rqst *rqstp)
> +{
> +	return list_empty(&rqstp->rq_idle);
> +}
> +
>  #define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net)
>  
>  /*
> diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
> index 6beb38c1dcb5..337c90787fb1 100644
> --- a/include/trace/events/sunrpc.h
> +++ b/include/trace/events/sunrpc.h
> @@ -1677,7 +1677,6 @@ DEFINE_SVCXDRBUF_EVENT(sendto);
>  	svc_rqst_flag(DROPME)						\
>  	svc_rqst_flag(SPLICE_OK)					\
>  	svc_rqst_flag(VICTIM)						\
> -	svc_rqst_flag(BUSY)						\
>  	svc_rqst_flag_end(DATA)
>  
>  #undef svc_rqst_flag
> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> index 1233d72714b9..dce433dea1bd 100644
> --- a/net/sunrpc/svc.c
> +++ b/net/sunrpc/svc.c
> @@ -641,7 +641,7 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node)
>  
>  	folio_batch_init(&rqstp->rq_fbatch);
>  
> -	__set_bit(RQ_BUSY, &rqstp->rq_flags);
> +	svc_thread_set_busy(rqstp);
>  	rqstp->rq_server = serv;
>  	rqstp->rq_pool = pool;
>  
> @@ -702,10 +702,13 @@ void svc_pool_wake_idle_thread(struct svc_pool *pool)
>  	struct svc_rqst	*rqstp;
>  
>  	rcu_read_lock();
> -	list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
> -		if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
> -			continue;
> -
> +	spin_lock_bh(&pool->sp_lock);
> +	rqstp = list_first_entry_or_null(&pool->sp_idle_threads,
> +					 struct svc_rqst, rq_idle);
> +	if (rqstp)
> +		list_del_init(&rqstp->rq_idle);
> +	spin_unlock_bh(&pool->sp_lock);
> +	if (rqstp) {
>  		WRITE_ONCE(rqstp->rq_qtime, ktime_get());
>  		wake_up_process(rqstp->rq_task);
>  		rcu_read_unlock();
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index 0a300ae6a7ed..e44efcc21b63 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -737,8 +737,9 @@ static void svc_rqst_wait_for_work(struct svc_rqst *rqstp)
>  		set_current_state(TASK_IDLE);
>  		smp_mb__before_atomic();
>  		clear_bit(SP_CONGESTED, &pool->sp_flags);
> -		clear_bit(RQ_BUSY, &rqstp->rq_flags);
> -		smp_mb__after_atomic();
> +		spin_lock_bh(&pool->sp_lock);
> +		list_add(&rqstp->rq_idle, &pool->sp_idle_threads);
> +		spin_unlock_bh(&pool->sp_lock);
>  
>  		/* Need to check should_sleep() again after
>  		 * setting task state in case a wakeup happened
> @@ -751,8 +752,14 @@ static void svc_rqst_wait_for_work(struct svc_rqst *rqstp)
>  			cond_resched();
>  		}
>  
> -		set_bit(RQ_BUSY, &rqstp->rq_flags);
> -		smp_mb__after_atomic();
> +		/* We *must* be removed from the list before we can continue.
> +		 * If we were woken, this is already done
> +		 */
> +		if (!svc_thread_busy(rqstp)) {
> +			spin_lock_bh(&pool->sp_lock);
> +			list_del_init(&rqstp->rq_idle);
> +			spin_unlock_bh(&pool->sp_lock);
> +		}
>  	} else
>  		cond_resched();
>  	try_to_freeze();
> -- 
> 2.40.1
>
NeilBrown Aug. 14, 2023, 9:32 p.m. UTC | #2
On Tue, 15 Aug 2023, Chuck Lever wrote:
> On Wed, Aug 02, 2023 at 05:34:42PM +1000, NeilBrown wrote:
> > Rather than searching a list of threads to find an idle one, having a
> > list of idle threads allows an idle thread to be found immediately.
> > 
> > This adds some spin_lock calls which is not ideal, but as the hold-time
> > is tiny it is still faster than searching a list.
> 
> Keep in mind that b1691bc03d4e ("sunrpc: convert to lockless lookup
> of queued server threads") did the opposite because that very
> spin_lock was highly contended. I am skeptical of the above claim
> without lock_stat data... but that's sort of moot as this is a
> temporary situation, as you point out next.

The old code did a lot more writes in the spin-locked region than this
code - so more hold-time.
But as you say - we would need data rather than speculation if this were
to be more than an interim state.

> 
> 
> > A future patch will
> > remove them using llist.h.  This involves some subtlety and so is left
> > to a separate patch.
> 
> Since I haven't seen that patch yet, I'm reserving judgement about
> whether and how these two changes might be merged.

I'll try to send the remainder of the series today.
> 
> 
> > This removes the need for the RQ_BUSY flag.  The rqst is "busy"
> > precisely when it is not on the "idle" list.
> 
> I've been having some trouble with this one. The server system
> deadlocks hard as soon as the NFS server starts. I tracked it down
> this morning: this patch never initialized the sp_idle_threads
> list_head.

Whoops.  Looks like I didn't test this particular intermediate state.

> 
> I will apply this patch (with one-line fix) and the patch that
> removes SP_CONGESTED once I hear from the client folks on the
> "integrate backchannel" patch.

Thanks,
NeilBrown
diff mbox series

Patch

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 1ac6f74781aa..8b93af92dd53 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -37,6 +37,7 @@  struct svc_pool {
 	struct list_head	sp_sockets;	/* pending sockets */
 	unsigned int		sp_nrthreads;	/* # of threads in pool */
 	struct list_head	sp_all_threads;	/* all server threads */
+	struct list_head	sp_idle_threads; /* idle server threads */
 
 	/* statistics on pool operation */
 	struct percpu_counter	sp_messages_arrived;
@@ -186,6 +187,7 @@  extern u32 svc_max_payload(const struct svc_rqst *rqstp);
  */
 struct svc_rqst {
 	struct list_head	rq_all;		/* all threads list */
+	struct list_head	rq_idle;	/* On the idle list */
 	struct rcu_head		rq_rcu_head;	/* for RCU deferred kfree */
 	struct svc_xprt *	rq_xprt;	/* transport ptr */
 
@@ -262,10 +264,31 @@  enum {
 	RQ_SPLICE_OK,		/* turned off in gss privacy to prevent
 				 * encrypting page cache pages */
 	RQ_VICTIM,		/* Have agreed to shut down */
-	RQ_BUSY,		/* request is busy */
 	RQ_DATA,		/* request has data */
 };
 
+/**
+ * svc_thread_set_busy - mark a thread as busy
+ * @rqstp: the thread which is now busy
+ *
+ * If rq_idle is "empty", the thread must be busy.
+ */
+static inline void svc_thread_set_busy(struct svc_rqst *rqstp)
+{
+	INIT_LIST_HEAD(&rqstp->rq_idle);
+}
+
+/**
+ * svc_thread_busy - check if a thread as busy
+ * @rqstp: the thread which might be busy
+ *
+ * If rq_idle is "empty", the thread must be busy.
+ */
+static inline bool svc_thread_busy(struct svc_rqst *rqstp)
+{
+	return list_empty(&rqstp->rq_idle);
+}
+
 #define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net)
 
 /*
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 6beb38c1dcb5..337c90787fb1 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1677,7 +1677,6 @@  DEFINE_SVCXDRBUF_EVENT(sendto);
 	svc_rqst_flag(DROPME)						\
 	svc_rqst_flag(SPLICE_OK)					\
 	svc_rqst_flag(VICTIM)						\
-	svc_rqst_flag(BUSY)						\
 	svc_rqst_flag_end(DATA)
 
 #undef svc_rqst_flag
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 1233d72714b9..dce433dea1bd 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -641,7 +641,7 @@  svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node)
 
 	folio_batch_init(&rqstp->rq_fbatch);
 
-	__set_bit(RQ_BUSY, &rqstp->rq_flags);
+	svc_thread_set_busy(rqstp);
 	rqstp->rq_server = serv;
 	rqstp->rq_pool = pool;
 
@@ -702,10 +702,13 @@  void svc_pool_wake_idle_thread(struct svc_pool *pool)
 	struct svc_rqst	*rqstp;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
-		if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
-			continue;
-
+	spin_lock_bh(&pool->sp_lock);
+	rqstp = list_first_entry_or_null(&pool->sp_idle_threads,
+					 struct svc_rqst, rq_idle);
+	if (rqstp)
+		list_del_init(&rqstp->rq_idle);
+	spin_unlock_bh(&pool->sp_lock);
+	if (rqstp) {
 		WRITE_ONCE(rqstp->rq_qtime, ktime_get());
 		wake_up_process(rqstp->rq_task);
 		rcu_read_unlock();
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 0a300ae6a7ed..e44efcc21b63 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -737,8 +737,9 @@  static void svc_rqst_wait_for_work(struct svc_rqst *rqstp)
 		set_current_state(TASK_IDLE);
 		smp_mb__before_atomic();
 		clear_bit(SP_CONGESTED, &pool->sp_flags);
-		clear_bit(RQ_BUSY, &rqstp->rq_flags);
-		smp_mb__after_atomic();
+		spin_lock_bh(&pool->sp_lock);
+		list_add(&rqstp->rq_idle, &pool->sp_idle_threads);
+		spin_unlock_bh(&pool->sp_lock);
 
 		/* Need to check should_sleep() again after
 		 * setting task state in case a wakeup happened
@@ -751,8 +752,14 @@  static void svc_rqst_wait_for_work(struct svc_rqst *rqstp)
 			cond_resched();
 		}
 
-		set_bit(RQ_BUSY, &rqstp->rq_flags);
-		smp_mb__after_atomic();
+		/* We *must* be removed from the list before we can continue.
+		 * If we were woken, this is already done
+		 */
+		if (!svc_thread_busy(rqstp)) {
+			spin_lock_bh(&pool->sp_lock);
+			list_del_init(&rqstp->rq_idle);
+			spin_unlock_bh(&pool->sp_lock);
+		}
 	} else
 		cond_resched();
 	try_to_freeze();