diff mbox series

[RFC,6/8] SUNRPC: Replace sp_threads_all with an xarray

Message ID 168806418337.1034990.3706968041401141634.stgit@morisot.1015granger.net (mailing list archive)
State New, archived
Headers show
Series RPC service thread scheduler optimizations | expand

Commit Message

Chuck Lever June 29, 2023, 6:43 p.m. UTC
From: Chuck Lever <chuck.lever@oracle.com>

We want a thread lookup operation that can be done with RCU only,
but to avoid the linked-list walk, which does not scale well in the
number of svc threads.

BH-disabled locking is no longer necessary because we're no longer
sharing the pool's sp_lock to protect either the xarray or the
pool's thread count. sp_lock also protects transport activity. As
far as I can tell, there are no callers of svc_set_num_threads()
that run outside of process context.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfssvc.c              |    3 +-
 include/linux/sunrpc/svc.h    |   11 +++----
 include/trace/events/sunrpc.h |   47 +++++++++++++++++++++++++++++-
 net/sunrpc/svc.c              |   65 +++++++++++++++++++++++++----------------
 net/sunrpc/svc_xprt.c         |    2 +
 5 files changed, 92 insertions(+), 36 deletions(-)
diff mbox series

Patch

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 2154fa63c5f2..d42b2a40c93c 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -62,8 +62,7 @@  static __be32			nfsd_init_request(struct svc_rqst *,
  * If (out side the lock) nn->nfsd_serv is non-NULL, then it must point to a
  * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0 (unless
  * nn->keep_active is set).  That number of nfsd threads must
- * exist and each must be listed in ->sp_all_threads in some entry of
- * ->sv_pools[].
+ * exist and each must be listed in some entry of ->sv_pools[].
  *
  * Each active thread holds a counted reference on nn->nfsd_serv, as does
  * the nn->keep_active flag and various transient calls to svc_get().
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index fbfe6ea737c8..45aa7648dca6 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -32,10 +32,10 @@ 
  */
 struct svc_pool {
 	unsigned int		sp_id;	    	/* pool id; also node id on NUMA */
-	spinlock_t		sp_lock;	/* protects all fields */
+	spinlock_t		sp_lock;	/* protects sp_sockets */
 	struct list_head	sp_sockets;	/* pending sockets */
 	unsigned int		sp_nrthreads;	/* # of threads in pool */
-	struct list_head	sp_all_threads;	/* all server threads */
+	struct xarray		sp_thread_xa;
 
 	/* statistics on pool operation */
 	struct percpu_counter	sp_sockets_queued;
@@ -194,7 +194,6 @@  extern u32 svc_max_payload(const struct svc_rqst *rqstp);
  * processed.
  */
 struct svc_rqst {
-	struct list_head	rq_all;		/* all threads list */
 	struct rcu_head		rq_rcu_head;	/* for RCU deferred kfree */
 	struct svc_xprt *	rq_xprt;	/* transport ptr */
 
@@ -239,10 +238,10 @@  struct svc_rqst {
 #define	RQ_SPLICE_OK	(4)			/* turned off in gss privacy
 						 * to prevent encrypting page
 						 * cache pages */
-#define	RQ_VICTIM	(5)			/* about to be shut down */
-#define	RQ_BUSY		(6)			/* request is busy */
-#define	RQ_DATA		(7)			/* request has data */
+#define	RQ_BUSY		(5)			/* request is busy */
+#define	RQ_DATA		(6)			/* request has data */
 	unsigned long		rq_flags;	/* flags field */
+	u32			rq_thread_id;	/* xarray index */
 	ktime_t			rq_qtime;	/* enqueue time */
 
 	void *			rq_argp;	/* decoded arguments */
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 70f3bc22c429..4ec746048f15 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1600,7 +1600,6 @@  DEFINE_SVCXDRBUF_EVENT(sendto);
 	svc_rqst_flag(USEDEFERRAL)					\
 	svc_rqst_flag(DROPME)						\
 	svc_rqst_flag(SPLICE_OK)					\
-	svc_rqst_flag(VICTIM)						\
 	svc_rqst_flag(BUSY)						\
 	svc_rqst_flag_end(DATA)
 
@@ -2043,6 +2042,52 @@  TRACE_EVENT(svc_pool_starved,
 	)
 );
 
+DECLARE_EVENT_CLASS(svc_thread_lifetime_class,
+	TP_PROTO(
+		const struct svc_serv *serv,
+		const struct svc_pool *pool,
+		const struct svc_rqst *rqstp
+	),
+
+	TP_ARGS(serv, pool, rqstp),
+
+	TP_STRUCT__entry(
+		__string(name, serv->sv_name)
+		__field(int, pool_id)
+		__field(unsigned int, nrthreads)
+		__field(unsigned long, pool_flags)
+		__field(u32, thread_id)
+		__field(const void *, rqstp)
+	),
+
+	TP_fast_assign(
+		__assign_str(name, serv->sv_name);
+		__entry->pool_id = pool->sp_id;
+		__entry->nrthreads = pool->sp_nrthreads;
+		__entry->pool_flags = pool->sp_flags;
+		__entry->thread_id = rqstp->rq_thread_id;
+		__entry->rqstp = rqstp;
+	),
+
+	TP_printk("service=%s pool=%d pool_flags=%s nrthreads=%u thread_id=%u",
+		__get_str(name), __entry->pool_id,
+		show_svc_pool_flags(__entry->pool_flags),
+		__entry->nrthreads, __entry->thread_id
+	)
+);
+
+#define DEFINE_SVC_THREAD_LIFETIME_EVENT(name) \
+	DEFINE_EVENT(svc_thread_lifetime_class, svc_pool_##name, \
+			TP_PROTO( \
+				const struct svc_serv *serv, \
+				const struct svc_pool *pool, \
+				const struct svc_rqst *rqstp \
+			), \
+			TP_ARGS(serv, pool, rqstp))
+
+DEFINE_SVC_THREAD_LIFETIME_EVENT(thread_init);
+DEFINE_SVC_THREAD_LIFETIME_EVENT(thread_exit);
+
 DECLARE_EVENT_CLASS(svc_xprt_event,
 	TP_PROTO(
 		const struct svc_xprt *xprt
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 828d28883ea8..18fbb98895ea 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -507,8 +507,8 @@  __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
 
 		pool->sp_id = i;
 		INIT_LIST_HEAD(&pool->sp_sockets);
-		INIT_LIST_HEAD(&pool->sp_all_threads);
 		spin_lock_init(&pool->sp_lock);
+		xa_init_flags(&pool->sp_thread_xa, XA_FLAGS_ALLOC);
 
 		percpu_counter_init(&pool->sp_sockets_queued, 0, GFP_KERNEL);
 		percpu_counter_init(&pool->sp_threads_woken, 0, GFP_KERNEL);
@@ -592,6 +592,8 @@  svc_destroy(struct kref *ref)
 		percpu_counter_destroy(&pool->sp_threads_woken);
 		percpu_counter_destroy(&pool->sp_threads_timedout);
 		percpu_counter_destroy(&pool->sp_threads_starved);
+
+		xa_destroy(&pool->sp_thread_xa);
 	}
 	kfree(serv->sv_pools);
 	kfree(serv);
@@ -672,7 +674,11 @@  EXPORT_SYMBOL_GPL(svc_rqst_alloc);
 static struct svc_rqst *
 svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
 {
+	static const struct xa_limit limit = {
+		.max = UINT_MAX,
+	};
 	struct svc_rqst	*rqstp;
+	int ret;
 
 	rqstp = svc_rqst_alloc(serv, pool, node);
 	if (!rqstp)
@@ -683,11 +689,21 @@  svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
 	serv->sv_nrthreads += 1;
 	spin_unlock_bh(&serv->sv_lock);
 
-	spin_lock_bh(&pool->sp_lock);
+	xa_lock(&pool->sp_thread_xa);
+	ret = __xa_alloc(&pool->sp_thread_xa, &rqstp->rq_thread_id, rqstp,
+			 limit, GFP_KERNEL);
+	if (ret) {
+		xa_unlock(&pool->sp_thread_xa);
+		goto out_free;
+	}
 	pool->sp_nrthreads++;
-	list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads);
-	spin_unlock_bh(&pool->sp_lock);
+	xa_unlock(&pool->sp_thread_xa);
+	trace_svc_pool_thread_init(serv, pool, rqstp);
 	return rqstp;
+
+out_free:
+	svc_rqst_free(rqstp);
+	return ERR_PTR(ret);
 }
 
 /**
@@ -704,19 +720,17 @@  struct svc_rqst *svc_pool_wake_idle_thread(struct svc_serv *serv,
 					   struct svc_pool *pool)
 {
 	struct svc_rqst	*rqstp;
+	unsigned long index;
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
+	xa_for_each(&pool->sp_thread_xa, index, rqstp) {
 		if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
 			continue;
 
-		rcu_read_unlock();
 		WRITE_ONCE(rqstp->rq_qtime, ktime_get());
 		wake_up_process(rqstp->rq_task);
 		percpu_counter_inc(&pool->sp_threads_woken);
 		return rqstp;
 	}
-	rcu_read_unlock();
 
 	trace_svc_pool_starved(serv, pool);
 	percpu_counter_inc(&pool->sp_threads_starved);
@@ -732,32 +746,31 @@  svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
 static struct task_struct *
 svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
 {
-	unsigned int i;
 	struct task_struct *task = NULL;
+	struct svc_rqst *rqstp;
+	unsigned long zero = 0;
+	unsigned int i;
 
 	if (pool != NULL) {
-		spin_lock_bh(&pool->sp_lock);
+		xa_lock(&pool->sp_thread_xa);
 	} else {
 		for (i = 0; i < serv->sv_nrpools; i++) {
 			pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
-			spin_lock_bh(&pool->sp_lock);
-			if (!list_empty(&pool->sp_all_threads))
+			xa_lock(&pool->sp_thread_xa);
+			if (!xa_empty(&pool->sp_thread_xa))
 				goto found_pool;
-			spin_unlock_bh(&pool->sp_lock);
+			xa_unlock(&pool->sp_thread_xa);
 		}
 		return NULL;
 	}
 
 found_pool:
-	if (!list_empty(&pool->sp_all_threads)) {
-		struct svc_rqst *rqstp;
-
-		rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all);
-		set_bit(RQ_VICTIM, &rqstp->rq_flags);
-		list_del_rcu(&rqstp->rq_all);
+	rqstp = xa_find(&pool->sp_thread_xa, &zero, U32_MAX, XA_PRESENT);
+	if (rqstp) {
+		__xa_erase(&pool->sp_thread_xa, rqstp->rq_thread_id);
 		task = rqstp->rq_task;
 	}
-	spin_unlock_bh(&pool->sp_lock);
+	xa_unlock(&pool->sp_thread_xa);
 	return task;
 }
 
@@ -839,9 +852,9 @@  svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
 	if (pool == NULL) {
 		nrservs -= serv->sv_nrthreads;
 	} else {
-		spin_lock_bh(&pool->sp_lock);
+		xa_lock(&pool->sp_thread_xa);
 		nrservs -= pool->sp_nrthreads;
-		spin_unlock_bh(&pool->sp_lock);
+		xa_unlock(&pool->sp_thread_xa);
 	}
 
 	if (nrservs > 0)
@@ -928,11 +941,11 @@  svc_exit_thread(struct svc_rqst *rqstp)
 	struct svc_serv	*serv = rqstp->rq_server;
 	struct svc_pool	*pool = rqstp->rq_pool;
 
-	spin_lock_bh(&pool->sp_lock);
+	xa_lock(&pool->sp_thread_xa);
 	pool->sp_nrthreads--;
-	if (!test_and_set_bit(RQ_VICTIM, &rqstp->rq_flags))
-		list_del_rcu(&rqstp->rq_all);
-	spin_unlock_bh(&pool->sp_lock);
+	__xa_erase(&pool->sp_thread_xa, rqstp->rq_thread_id);
+	xa_unlock(&pool->sp_thread_xa);
+	trace_svc_pool_thread_exit(serv, pool, rqstp);
 
 	spin_lock_bh(&serv->sv_lock);
 	serv->sv_nrthreads -= 1;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 7d5aed4d1766..77fc20b2181d 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -46,7 +46,7 @@  static LIST_HEAD(svc_xprt_class_list);
 
 /* SMP locking strategy:
  *
- *	svc_pool->sp_lock protects most of the fields of that pool.
+ *	svc_pool->sp_lock protects sp_sockets.
  *	svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt.
  *	when both need to be taken (rare), svc_serv->sv_lock is first.
  *	The "service mutex" protects svc_serv->sv_nrthread.