@@ -62,8 +62,7 @@ static __be32 nfsd_init_request(struct svc_rqst *,
* If (out side the lock) nn->nfsd_serv is non-NULL, then it must point to a
* properly initialised 'struct svc_serv' with ->sv_nrthreads > 0 (unless
* nn->keep_active is set). That number of nfsd threads must
- * exist and each must be listed in ->sp_all_threads in some entry of
- * ->sv_pools[].
+ * exist and each must be listed in some entry of ->sv_pools[].
*
* Each active thread holds a counted reference on nn->nfsd_serv, as does
* the nn->keep_active flag and various transient calls to svc_get().
@@ -32,10 +32,10 @@
*/
struct svc_pool {
unsigned int sp_id; /* pool id; also node id on NUMA */
- spinlock_t sp_lock; /* protects all fields */
+ spinlock_t sp_lock; /* protects sp_sockets */
struct list_head sp_sockets; /* pending sockets */
unsigned int sp_nrthreads; /* # of threads in pool */
- struct list_head sp_all_threads; /* all server threads */
+ struct xarray sp_thread_xa;
/* statistics on pool operation */
struct percpu_counter sp_sockets_queued;
@@ -194,7 +194,6 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp);
* processed.
*/
struct svc_rqst {
- struct list_head rq_all; /* all threads list */
struct rcu_head rq_rcu_head; /* for RCU deferred kfree */
struct svc_xprt * rq_xprt; /* transport ptr */
@@ -239,10 +238,10 @@ struct svc_rqst {
#define RQ_SPLICE_OK (4) /* turned off in gss privacy
* to prevent encrypting page
* cache pages */
-#define RQ_VICTIM (5) /* about to be shut down */
-#define RQ_BUSY (6) /* request is busy */
-#define RQ_DATA (7) /* request has data */
+#define RQ_BUSY (5) /* request is busy */
+#define RQ_DATA (6) /* request has data */
unsigned long rq_flags; /* flags field */
+ u32 rq_thread_id; /* xarray index */
ktime_t rq_qtime; /* enqueue time */
void * rq_argp; /* decoded arguments */
@@ -1600,7 +1600,6 @@ DEFINE_SVCXDRBUF_EVENT(sendto);
svc_rqst_flag(USEDEFERRAL) \
svc_rqst_flag(DROPME) \
svc_rqst_flag(SPLICE_OK) \
- svc_rqst_flag(VICTIM) \
svc_rqst_flag(BUSY) \
svc_rqst_flag_end(DATA)
@@ -2043,6 +2042,52 @@ TRACE_EVENT(svc_pool_starved,
)
);
+DECLARE_EVENT_CLASS(svc_thread_lifetime_class,
+ TP_PROTO(
+ const struct svc_serv *serv,
+ const struct svc_pool *pool,
+ const struct svc_rqst *rqstp
+ ),
+
+ TP_ARGS(serv, pool, rqstp),
+
+ TP_STRUCT__entry(
+ __string(name, serv->sv_name)
+ __field(int, pool_id)
+ __field(unsigned int, nrthreads)
+ __field(unsigned long, pool_flags)
+ __field(u32, thread_id)
+ __field(const void *, rqstp)
+ ),
+
+ TP_fast_assign(
+ __assign_str(name, serv->sv_name);
+ __entry->pool_id = pool->sp_id;
+ __entry->nrthreads = pool->sp_nrthreads;
+ __entry->pool_flags = pool->sp_flags;
+ __entry->thread_id = rqstp->rq_thread_id;
+ __entry->rqstp = rqstp;
+ ),
+
+ TP_printk("service=%s pool=%d pool_flags=%s nrthreads=%u thread_id=%u",
+ __get_str(name), __entry->pool_id,
+ show_svc_pool_flags(__entry->pool_flags),
+ __entry->nrthreads, __entry->thread_id
+ )
+);
+
+#define DEFINE_SVC_THREAD_LIFETIME_EVENT(name) \
+ DEFINE_EVENT(svc_thread_lifetime_class, svc_pool_##name, \
+ TP_PROTO( \
+ const struct svc_serv *serv, \
+ const struct svc_pool *pool, \
+ const struct svc_rqst *rqstp \
+ ), \
+ TP_ARGS(serv, pool, rqstp))
+
+DEFINE_SVC_THREAD_LIFETIME_EVENT(thread_init);
+DEFINE_SVC_THREAD_LIFETIME_EVENT(thread_exit);
+
DECLARE_EVENT_CLASS(svc_xprt_event,
TP_PROTO(
const struct svc_xprt *xprt
@@ -507,8 +507,8 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
pool->sp_id = i;
INIT_LIST_HEAD(&pool->sp_sockets);
- INIT_LIST_HEAD(&pool->sp_all_threads);
spin_lock_init(&pool->sp_lock);
+ xa_init_flags(&pool->sp_thread_xa, XA_FLAGS_ALLOC);
percpu_counter_init(&pool->sp_sockets_queued, 0, GFP_KERNEL);
percpu_counter_init(&pool->sp_threads_woken, 0, GFP_KERNEL);
@@ -592,6 +592,8 @@ svc_destroy(struct kref *ref)
percpu_counter_destroy(&pool->sp_threads_woken);
percpu_counter_destroy(&pool->sp_threads_timedout);
percpu_counter_destroy(&pool->sp_threads_starved);
+
+ xa_destroy(&pool->sp_thread_xa);
}
kfree(serv->sv_pools);
kfree(serv);
@@ -672,7 +674,11 @@ EXPORT_SYMBOL_GPL(svc_rqst_alloc);
static struct svc_rqst *
svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
{
+ static const struct xa_limit limit = {
+ .max = UINT_MAX,
+ };
struct svc_rqst *rqstp;
+ int ret;
rqstp = svc_rqst_alloc(serv, pool, node);
if (!rqstp)
@@ -683,11 +689,21 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
serv->sv_nrthreads += 1;
spin_unlock_bh(&serv->sv_lock);
- spin_lock_bh(&pool->sp_lock);
+ xa_lock(&pool->sp_thread_xa);
+ ret = __xa_alloc(&pool->sp_thread_xa, &rqstp->rq_thread_id, rqstp,
+ limit, GFP_KERNEL);
+ if (ret) {
+ xa_unlock(&pool->sp_thread_xa);
+ goto out_free;
+ }
pool->sp_nrthreads++;
- list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads);
- spin_unlock_bh(&pool->sp_lock);
+ xa_unlock(&pool->sp_thread_xa);
+ trace_svc_pool_thread_init(serv, pool, rqstp);
return rqstp;
+
+out_free:
+ svc_rqst_free(rqstp);
+ return ERR_PTR(ret);
}
/**
@@ -704,19 +720,17 @@ struct svc_rqst *svc_pool_wake_idle_thread(struct svc_serv *serv,
struct svc_pool *pool)
{
struct svc_rqst *rqstp;
+ unsigned long index;
- rcu_read_lock();
- list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
+ xa_for_each(&pool->sp_thread_xa, index, rqstp) {
if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
continue;
- rcu_read_unlock();
WRITE_ONCE(rqstp->rq_qtime, ktime_get());
wake_up_process(rqstp->rq_task);
percpu_counter_inc(&pool->sp_threads_woken);
return rqstp;
}
- rcu_read_unlock();
trace_svc_pool_starved(serv, pool);
percpu_counter_inc(&pool->sp_threads_starved);
@@ -732,32 +746,31 @@ svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
static struct task_struct *
svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
{
- unsigned int i;
struct task_struct *task = NULL;
+ struct svc_rqst *rqstp;
+ unsigned long zero = 0;
+ unsigned int i;
if (pool != NULL) {
- spin_lock_bh(&pool->sp_lock);
+ xa_lock(&pool->sp_thread_xa);
} else {
for (i = 0; i < serv->sv_nrpools; i++) {
pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
- spin_lock_bh(&pool->sp_lock);
- if (!list_empty(&pool->sp_all_threads))
+ xa_lock(&pool->sp_thread_xa);
+ if (!xa_empty(&pool->sp_thread_xa))
goto found_pool;
- spin_unlock_bh(&pool->sp_lock);
+ xa_unlock(&pool->sp_thread_xa);
}
return NULL;
}
found_pool:
- if (!list_empty(&pool->sp_all_threads)) {
- struct svc_rqst *rqstp;
-
- rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all);
- set_bit(RQ_VICTIM, &rqstp->rq_flags);
- list_del_rcu(&rqstp->rq_all);
+ rqstp = xa_find(&pool->sp_thread_xa, &zero, U32_MAX, XA_PRESENT);
+ if (rqstp) {
+ __xa_erase(&pool->sp_thread_xa, rqstp->rq_thread_id);
task = rqstp->rq_task;
}
- spin_unlock_bh(&pool->sp_lock);
+ xa_unlock(&pool->sp_thread_xa);
return task;
}
@@ -839,9 +852,9 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
if (pool == NULL) {
nrservs -= serv->sv_nrthreads;
} else {
- spin_lock_bh(&pool->sp_lock);
+ xa_lock(&pool->sp_thread_xa);
nrservs -= pool->sp_nrthreads;
- spin_unlock_bh(&pool->sp_lock);
+ xa_unlock(&pool->sp_thread_xa);
}
if (nrservs > 0)
@@ -928,11 +941,11 @@ svc_exit_thread(struct svc_rqst *rqstp)
struct svc_serv *serv = rqstp->rq_server;
struct svc_pool *pool = rqstp->rq_pool;
- spin_lock_bh(&pool->sp_lock);
+ xa_lock(&pool->sp_thread_xa);
pool->sp_nrthreads--;
- if (!test_and_set_bit(RQ_VICTIM, &rqstp->rq_flags))
- list_del_rcu(&rqstp->rq_all);
- spin_unlock_bh(&pool->sp_lock);
+ __xa_erase(&pool->sp_thread_xa, rqstp->rq_thread_id);
+ xa_unlock(&pool->sp_thread_xa);
+ trace_svc_pool_thread_exit(serv, pool, rqstp);
spin_lock_bh(&serv->sv_lock);
serv->sv_nrthreads -= 1;
@@ -46,7 +46,7 @@ static LIST_HEAD(svc_xprt_class_list);
/* SMP locking strategy:
*
- * svc_pool->sp_lock protects most of the fields of that pool.
+ * svc_pool->sp_lock protects sp_sockets.
* svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt.
* when both need to be taken (rare), svc_serv->sv_lock is first.
* The "service mutex" protects svc_serv->sv_nrthread.