From patchwork Tue Jul 4 00:07:36 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13300545 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id C5EF5EB64DD for ; Tue, 4 Jul 2023 00:07:40 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230144AbjGDAHj (ORCPT ); Mon, 3 Jul 2023 20:07:39 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:56174 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229504AbjGDAHj (ORCPT ); Mon, 3 Jul 2023 20:07:39 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 761F2189 for ; Mon, 3 Jul 2023 17:07:38 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 14D2F61089 for ; Tue, 4 Jul 2023 00:07:38 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 1184BC433C9; Tue, 4 Jul 2023 00:07:37 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1688429257; bh=9HoA3ayuqif/MRiyu04L0DGS2/ZRjy2hHxko7SMSdrA=; h=Subject:From:To:Cc:Date:In-Reply-To:References:From; b=lGUlPFR2kZ977TFgFZwbyd3uW2wyCpuJxkUi16LTpyqddD5JxWTnEYuXiqw9RJUYh u7+c17hCABU+3fxnMEsJjjXjDlCI8d3Wi30EbnwnJUcCQRDSCzBvZ1Tw36G49gcvsU ouUeTrjg5stMQ+OfCaJsKjzXEGlISViJOZf9qBl6BM6TOLdVjsuyOzVolfn5heeXjV f9HHuf5e0/ZTopPYnivlceMxfyKM+0uB4hqCf+KYuRsdkeMFPjiEauKt6ORk6Tvekl m8f4ixwHOL/Mfup54+jmjsPaDsO5+0mkclyBZwi43OO+ek08bx4uphPS2/75KeRceJ 8dJOUUDTXZ64A== Subject: [PATCH v2 1/9] SUNRPC: Deduplicate thread wake-up code From: Chuck Lever To: linux-nfs@vger.kernel.org Cc: Chuck Lever , lorenzo@kernel.org, neilb@suse.de, jlayton@redhat.com, david@fromorbit.com Date: Mon, 03 Jul 2023 20:07:36 -0400 Message-ID: <168842925607.139194.5466712124437233518.stgit@manet.1015granger.net> In-Reply-To: <168842897573.139194.15893960758088950748.stgit@manet.1015granger.net> References: <168842897573.139194.15893960758088950748.stgit@manet.1015granger.net> User-Agent: StGit/1.5 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-nfs@vger.kernel.org From: Chuck Lever Refactor: Extract the loop that finds an idle service thread from svc_xprt_enqueue() and svc_wake_up(). Both functions do just about the same thing. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 1 + net/sunrpc/svc.c | 28 ++++++++++++++++++++++++++ net/sunrpc/svc_xprt.c | 48 +++++++++++++++----------------------------- 3 files changed, 45 insertions(+), 32 deletions(-) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index f8751118c122..dc2d90a655e2 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -427,6 +427,7 @@ int svc_register(const struct svc_serv *, struct net *, const int, void svc_wake_up(struct svc_serv *); void svc_reserve(struct svc_rqst *rqstp, int space); +struct svc_rqst *svc_pool_wake_idle_thread(struct svc_pool *pool); struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv); char * svc_print_addr(struct svc_rqst *, char *, size_t); const char * svc_proc_name(const struct svc_rqst *rqstp); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 587811a002c9..e81ce5f76abd 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -689,6 +689,34 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) return rqstp; } +/** + * svc_pool_wake_idle_thread - wake an idle thread in @pool + * @pool: service thread pool + * + * Returns an idle service thread (now marked BUSY), or NULL + * if no service threads are available. Finding an idle service + * thread and marking it BUSY is atomic with respect to other + * calls to svc_pool_wake_idle_thread(). + */ +struct svc_rqst *svc_pool_wake_idle_thread(struct svc_pool *pool) +{ + struct svc_rqst *rqstp; + + rcu_read_lock(); + list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { + if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) + continue; + + rcu_read_unlock(); + WRITE_ONCE(rqstp->rq_qtime, ktime_get()); + wake_up_process(rqstp->rq_task); + percpu_counter_inc(&pool->sp_threads_woken); + return rqstp; + } + rcu_read_unlock(); + return NULL; +} + /* * Choose a pool in which to create a new thread, for svc_set_num_threads */ diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 62c7919ea610..89302bf09b77 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -455,8 +455,8 @@ static bool svc_xprt_ready(struct svc_xprt *xprt) */ void svc_xprt_enqueue(struct svc_xprt *xprt) { + struct svc_rqst *rqstp; struct svc_pool *pool; - struct svc_rqst *rqstp = NULL; if (!svc_xprt_ready(xprt)) return; @@ -476,20 +476,10 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); spin_unlock_bh(&pool->sp_lock); - /* find a thread for this xprt */ - rcu_read_lock(); - list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { - if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) - continue; - percpu_counter_inc(&pool->sp_threads_woken); - rqstp->rq_qtime = ktime_get(); - wake_up_process(rqstp->rq_task); - goto out_unlock; - } - set_bit(SP_CONGESTED, &pool->sp_flags); - rqstp = NULL; -out_unlock: - rcu_read_unlock(); + rqstp = svc_pool_wake_idle_thread(pool); + if (!rqstp) + set_bit(SP_CONGESTED, &pool->sp_flags); + trace_svc_xprt_enqueue(xprt, rqstp); } EXPORT_SYMBOL_GPL(svc_xprt_enqueue); @@ -581,7 +571,10 @@ static void svc_xprt_release(struct svc_rqst *rqstp) svc_xprt_put(xprt); } -/* +/** + * svc_wake_up - Wake up a service thread for non-transport work + * @serv: RPC service + * * Some svc_serv's will have occasional work to do, even when a xprt is not * waiting to be serviced. This function is there to "kick" a task in one of * those services so that it can wake up and do that work. Note that we only @@ -590,27 +583,18 @@ static void svc_xprt_release(struct svc_rqst *rqstp) */ void svc_wake_up(struct svc_serv *serv) { + struct svc_pool *pool = &serv->sv_pools[0]; struct svc_rqst *rqstp; - struct svc_pool *pool; - pool = &serv->sv_pools[0]; - - rcu_read_lock(); - list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { - /* skip any that aren't queued */ - if (test_bit(RQ_BUSY, &rqstp->rq_flags)) - continue; - rcu_read_unlock(); - wake_up_process(rqstp->rq_task); - trace_svc_wake_up(rqstp->rq_task->pid); + rqstp = svc_pool_wake_idle_thread(pool); + if (!rqstp) { + set_bit(SP_TASK_PENDING, &pool->sp_flags); + smp_wmb(); + trace_svc_wake_up(0); return; } - rcu_read_unlock(); - /* No free entries available */ - set_bit(SP_TASK_PENDING, &pool->sp_flags); - smp_wmb(); - trace_svc_wake_up(0); + trace_svc_wake_up(rqstp->rq_task->pid); } EXPORT_SYMBOL_GPL(svc_wake_up); From patchwork Tue Jul 4 00:07:42 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13300546 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 06C91EB64DD for ; Tue, 4 Jul 2023 00:07:48 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230294AbjGDAHr (ORCPT ); Mon, 3 Jul 2023 20:07:47 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:56210 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229504AbjGDAHq (ORCPT ); Mon, 3 Jul 2023 20:07:46 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [IPv6:2604:1380:4641:c500::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 4E0A418C for ; Mon, 3 Jul 2023 17:07:45 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id B57D761086 for ; Tue, 4 Jul 2023 00:07:44 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id ABDC7C433C7; Tue, 4 Jul 2023 00:07:43 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1688429264; bh=YfKIoZi1v+wsiRs6nMLcZQRlR3YuXX7ebipeFDlD49w=; h=Subject:From:To:Cc:Date:In-Reply-To:References:From; b=qSHCTTLI2L/mNprD0FXVPiCN98RShy6NBVPBKfMnalmYzl2vPZ1/SLBXSrqIMSlXz onvSbL3jvucqxTf2AVVBAzjBRAahjhQs0CucbjjMsugsSCfS5br5f03D8l7ILVb2uF snXyPxtGWU2mRCqzDCG2d2k38lm0hmODQ3SjZZLQDJPX4C4Vz7H57YhUw2BRBHXvp5 nSCgPauMLtjgPuC5R0d9ZnjXrksMjbxvgqFxyUWgFt3SjCb2ej5LcoJg1Wv0DcXR8V WtBL9J45UuqZrSip+tAZcmn4BAXI/aR8oYecgDkixy+FHWWxk4WTAevbVBPfWAcAM5 BsuCP7rxR4chw== Subject: [PATCH v2 2/9] SUNRPC: Report when no service thread is available. From: Chuck Lever To: linux-nfs@vger.kernel.org Cc: Chuck Lever , lorenzo@kernel.org, neilb@suse.de, jlayton@redhat.com, david@fromorbit.com Date: Mon, 03 Jul 2023 20:07:42 -0400 Message-ID: <168842926269.139194.752143439158887888.stgit@manet.1015granger.net> In-Reply-To: <168842897573.139194.15893960758088950748.stgit@manet.1015granger.net> References: <168842897573.139194.15893960758088950748.stgit@manet.1015granger.net> User-Agent: StGit/1.5 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-nfs@vger.kernel.org From: Chuck Lever Count and record thread pool starvation. Administrators can take action by increasing thread count or decreasing workload. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 5 +++- include/trace/events/sunrpc.h | 49 ++++++++++++++++++++++++++++++++++------- net/sunrpc/svc.c | 9 +++++++- net/sunrpc/svc_xprt.c | 22 ++++++++++-------- 4 files changed, 64 insertions(+), 21 deletions(-) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index dc2d90a655e2..fbfe6ea737c8 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -22,7 +22,6 @@ #include /* - * * RPC service thread pool. * * Pool of threads and temporary sockets. Generally there is only @@ -42,6 +41,7 @@ struct svc_pool { struct percpu_counter sp_sockets_queued; struct percpu_counter sp_threads_woken; struct percpu_counter sp_threads_timedout; + struct percpu_counter sp_threads_starved; #define SP_TASK_PENDING (0) /* still work to do even if no * xprt is queued. */ @@ -427,7 +427,8 @@ int svc_register(const struct svc_serv *, struct net *, const int, void svc_wake_up(struct svc_serv *); void svc_reserve(struct svc_rqst *rqstp, int space); -struct svc_rqst *svc_pool_wake_idle_thread(struct svc_pool *pool); +struct svc_rqst *svc_pool_wake_idle_thread(struct svc_serv *serv, + struct svc_pool *pool); struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv); char * svc_print_addr(struct svc_rqst *, char *, size_t); const char * svc_proc_name(const struct svc_rqst *rqstp); diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 43711753616a..9b70fc1c698a 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1994,21 +1994,21 @@ TRACE_EVENT(svc_xprt_create_err, TRACE_EVENT(svc_xprt_enqueue, TP_PROTO( const struct svc_xprt *xprt, - const struct svc_rqst *rqst + const struct svc_rqst *wakee ), - TP_ARGS(xprt, rqst), + TP_ARGS(xprt, wakee), TP_STRUCT__entry( SVC_XPRT_ENDPOINT_FIELDS(xprt) - __field(int, pid) + __field(pid_t, pid) ), TP_fast_assign( SVC_XPRT_ENDPOINT_ASSIGNMENTS(xprt); - __entry->pid = rqst? rqst->rq_task->pid : 0; + __entry->pid = wakee->rq_task->pid; ), TP_printk(SVC_XPRT_ENDPOINT_FORMAT " pid=%d", @@ -2039,6 +2039,39 @@ TRACE_EVENT(svc_xprt_dequeue, SVC_XPRT_ENDPOINT_VARARGS, __entry->wakeup) ); +#define show_svc_pool_flags(x) \ + __print_flags(x, "|", \ + { BIT(SP_TASK_PENDING), "TASK_PENDING" }, \ + { BIT(SP_CONGESTED), "CONGESTED" }) + +TRACE_EVENT(svc_pool_starved, + TP_PROTO( + const struct svc_serv *serv, + const struct svc_pool *pool + ), + + TP_ARGS(serv, pool), + + TP_STRUCT__entry( + __string(name, serv->sv_name) + __field(int, pool_id) + __field(unsigned int, nrthreads) + __field(unsigned long, flags) + ), + + TP_fast_assign( + __assign_str(name, serv->sv_name); + __entry->pool_id = pool->sp_id; + __entry->nrthreads = pool->sp_nrthreads; + __entry->flags = pool->sp_flags; + ), + + TP_printk("service=%s pool=%d flags=%s nrthreads=%u", + __get_str(name), __entry->pool_id, + show_svc_pool_flags(__entry->flags), __entry->nrthreads + ) +); + DECLARE_EVENT_CLASS(svc_xprt_event, TP_PROTO( const struct svc_xprt *xprt @@ -2109,16 +2142,16 @@ TRACE_EVENT(svc_xprt_accept, ); TRACE_EVENT(svc_wake_up, - TP_PROTO(int pid), + TP_PROTO(const struct svc_rqst *wakee), - TP_ARGS(pid), + TP_ARGS(wakee), TP_STRUCT__entry( - __field(int, pid) + __field(pid_t, pid) ), TP_fast_assign( - __entry->pid = pid; + __entry->pid = wakee->rq_task->pid; ), TP_printk("pid=%d", __entry->pid) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index e81ce5f76abd..04151e22ec44 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -516,6 +516,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, percpu_counter_init(&pool->sp_sockets_queued, 0, GFP_KERNEL); percpu_counter_init(&pool->sp_threads_woken, 0, GFP_KERNEL); percpu_counter_init(&pool->sp_threads_timedout, 0, GFP_KERNEL); + percpu_counter_init(&pool->sp_threads_starved, 0, GFP_KERNEL); } return serv; @@ -591,6 +592,7 @@ svc_destroy(struct kref *ref) percpu_counter_destroy(&pool->sp_sockets_queued); percpu_counter_destroy(&pool->sp_threads_woken); percpu_counter_destroy(&pool->sp_threads_timedout); + percpu_counter_destroy(&pool->sp_threads_starved); } kfree(serv->sv_pools); kfree(serv); @@ -691,6 +693,7 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) /** * svc_pool_wake_idle_thread - wake an idle thread in @pool + * @serv: RPC service * @pool: service thread pool * * Returns an idle service thread (now marked BUSY), or NULL @@ -698,7 +701,8 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) * thread and marking it BUSY is atomic with respect to other * calls to svc_pool_wake_idle_thread(). */ -struct svc_rqst *svc_pool_wake_idle_thread(struct svc_pool *pool) +struct svc_rqst *svc_pool_wake_idle_thread(struct svc_serv *serv, + struct svc_pool *pool) { struct svc_rqst *rqstp; @@ -714,6 +718,9 @@ struct svc_rqst *svc_pool_wake_idle_thread(struct svc_pool *pool) return rqstp; } rcu_read_unlock(); + + trace_svc_pool_starved(serv, pool); + percpu_counter_inc(&pool->sp_threads_starved); return NULL; } diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 89302bf09b77..a1ed6fb69793 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -455,7 +455,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt) */ void svc_xprt_enqueue(struct svc_xprt *xprt) { - struct svc_rqst *rqstp; + struct svc_rqst *rqstp; struct svc_pool *pool; if (!svc_xprt_ready(xprt)) @@ -476,9 +476,11 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); spin_unlock_bh(&pool->sp_lock); - rqstp = svc_pool_wake_idle_thread(pool); - if (!rqstp) + rqstp = svc_pool_wake_idle_thread(xprt->xpt_server, pool); + if (!rqstp) { set_bit(SP_CONGESTED, &pool->sp_flags); + return; + } trace_svc_xprt_enqueue(xprt, rqstp); } @@ -584,17 +586,16 @@ static void svc_xprt_release(struct svc_rqst *rqstp) void svc_wake_up(struct svc_serv *serv) { struct svc_pool *pool = &serv->sv_pools[0]; - struct svc_rqst *rqstp; + struct svc_rqst *rqstp; - rqstp = svc_pool_wake_idle_thread(pool); + rqstp = svc_pool_wake_idle_thread(serv, pool); if (!rqstp) { set_bit(SP_TASK_PENDING, &pool->sp_flags); smp_wmb(); - trace_svc_wake_up(0); return; } - trace_svc_wake_up(rqstp->rq_task->pid); + trace_svc_wake_up(rqstp); } EXPORT_SYMBOL_GPL(svc_wake_up); @@ -1436,16 +1437,17 @@ static int svc_pool_stats_show(struct seq_file *m, void *p) struct svc_pool *pool = p; if (p == SEQ_START_TOKEN) { - seq_puts(m, "# pool packets-arrived sockets-enqueued threads-woken threads-timedout\n"); + seq_puts(m, "# pool packets-arrived xprts-enqueued threads-woken threads-timedout starved\n"); return 0; } - seq_printf(m, "%u %llu %llu %llu %llu\n", + seq_printf(m, "%u %llu %llu %llu %llu %llu\n", pool->sp_id, percpu_counter_sum_positive(&pool->sp_sockets_queued), percpu_counter_sum_positive(&pool->sp_sockets_queued), percpu_counter_sum_positive(&pool->sp_threads_woken), - percpu_counter_sum_positive(&pool->sp_threads_timedout)); + percpu_counter_sum_positive(&pool->sp_threads_timedout), + percpu_counter_sum_positive(&pool->sp_threads_starved)); return 0; } From patchwork Tue Jul 4 00:07:49 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13300547 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 3DA0AEB64DD for ; Tue, 4 Jul 2023 00:07:54 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230295AbjGDAHx (ORCPT ); Mon, 3 Jul 2023 20:07:53 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:56248 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229504AbjGDAHw (ORCPT ); Mon, 3 Jul 2023 20:07:52 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C2AB4189 for ; Mon, 3 Jul 2023 17:07:51 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 4A68661088 for ; Tue, 4 Jul 2023 00:07:51 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 4530AC433C9; Tue, 4 Jul 2023 00:07:50 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1688429270; bh=6tgxkoGwbdPWA2kjPEvno7uq9F/lgC1ObR+QkLawEUc=; h=Subject:From:To:Cc:Date:In-Reply-To:References:From; b=bQCslDhaDObXeTGA9Qq+wK5p3Oh/x5Q+ByIoNF+NsXRnJog2bREd+tuW+z6mgI0Eu wyRZT84X8e0iL2wCk91aYIZ8SzkV0gruJGGUrmVqxqq8l3ZnXkGV5u2fE4pumT1PdX xylnk9ZvnjcMNW7ABAQkDdd17NarUAQeYLZJPZ1gkID64Sqi0thNaYi/AhpSpJm2lj edvmxmT2trqIrIFXE5AvSmvyTsst5l5dBQkXVEDKRaL/jjK3HRpCTdELeZYj81WFQa 9/ZnZtGb6uVgACpZjgMFHD4mWkQJgoloxXpkRtvh6wlXWKlOG/OlEDxyx2/xPSj760 m+J78WNUheyfQ== Subject: [PATCH v2 3/9] SUNRPC: Split the svc_xprt_dequeue tracepoint From: Chuck Lever To: linux-nfs@vger.kernel.org Cc: Chuck Lever , lorenzo@kernel.org, neilb@suse.de, jlayton@redhat.com, david@fromorbit.com Date: Mon, 03 Jul 2023 20:07:49 -0400 Message-ID: <168842926933.139194.3943503423705857676.stgit@manet.1015granger.net> In-Reply-To: <168842897573.139194.15893960758088950748.stgit@manet.1015granger.net> References: <168842897573.139194.15893960758088950748.stgit@manet.1015granger.net> User-Agent: StGit/1.5 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-nfs@vger.kernel.org From: Chuck Lever Distinguish between the case where new work was picked up just by looking at the transport queue versus when the thread was awoken. This gives us better visibility about how well-utilized the thread pool is. Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 47 +++++++++++++++++++++++++++++++---------- net/sunrpc/svc_xprt.c | 9 +++++--- 2 files changed, 41 insertions(+), 15 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 9b70fc1c698a..2e83887b58cd 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -2015,34 +2015,57 @@ TRACE_EVENT(svc_xprt_enqueue, SVC_XPRT_ENDPOINT_VARARGS, __entry->pid) ); -TRACE_EVENT(svc_xprt_dequeue, +#define show_svc_pool_flags(x) \ + __print_flags(x, "|", \ + { BIT(SP_TASK_PENDING), "TASK_PENDING" }, \ + { BIT(SP_CONGESTED), "CONGESTED" }) + +DECLARE_EVENT_CLASS(svc_pool_scheduler_class, TP_PROTO( - const struct svc_rqst *rqst + const struct svc_rqst *rqstp ), - TP_ARGS(rqst), + TP_ARGS(rqstp), TP_STRUCT__entry( - SVC_XPRT_ENDPOINT_FIELDS(rqst->rq_xprt) + SVC_XPRT_ENDPOINT_FIELDS(rqstp->rq_xprt) + __string(name, rqstp->rq_server->sv_name) + __field(int, pool_id) + __field(unsigned int, nrthreads) + __field(unsigned long, pool_flags) __field(unsigned long, wakeup) ), TP_fast_assign( - SVC_XPRT_ENDPOINT_ASSIGNMENTS(rqst->rq_xprt); + struct svc_pool *pool = rqstp->rq_pool; + SVC_XPRT_ENDPOINT_ASSIGNMENTS(rqstp->rq_xprt); + __assign_str(name, rqstp->rq_server->sv_name); + __entry->pool_id = pool->sp_id; + __entry->nrthreads = pool->sp_nrthreads; + __entry->pool_flags = pool->sp_flags; __entry->wakeup = ktime_to_us(ktime_sub(ktime_get(), - rqst->rq_qtime)); + rqstp->rq_qtime)); ), - TP_printk(SVC_XPRT_ENDPOINT_FORMAT " wakeup-us=%lu", - SVC_XPRT_ENDPOINT_VARARGS, __entry->wakeup) + TP_printk(SVC_XPRT_ENDPOINT_FORMAT + " service=%s pool=%d pool_flags=%s nrthreads=%u wakeup-us=%lu", + SVC_XPRT_ENDPOINT_VARARGS, __get_str(name), __entry->pool_id, + show_svc_pool_flags(__entry->pool_flags), __entry->nrthreads, + __entry->wakeup + ) ); -#define show_svc_pool_flags(x) \ - __print_flags(x, "|", \ - { BIT(SP_TASK_PENDING), "TASK_PENDING" }, \ - { BIT(SP_CONGESTED), "CONGESTED" }) +#define DEFINE_SVC_POOL_SCHEDULER_EVENT(name) \ + DEFINE_EVENT(svc_pool_scheduler_class, svc_pool_##name, \ + TP_PROTO( \ + const struct svc_rqst *rqstp \ + ), \ + TP_ARGS(rqstp)) + +DEFINE_SVC_POOL_SCHEDULER_EVENT(polled); +DEFINE_SVC_POOL_SCHEDULER_EVENT(awoken); TRACE_EVENT(svc_pool_starved, TP_PROTO( diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index a1ed6fb69793..7ee095d03996 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -744,8 +744,10 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) WARN_ON_ONCE(rqstp->rq_xprt); rqstp->rq_xprt = svc_xprt_dequeue(pool); - if (rqstp->rq_xprt) + if (rqstp->rq_xprt) { + trace_svc_pool_polled(rqstp); goto out_found; + } /* * We have to be able to interrupt this wait @@ -767,8 +769,10 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) set_bit(RQ_BUSY, &rqstp->rq_flags); smp_mb__after_atomic(); rqstp->rq_xprt = svc_xprt_dequeue(pool); - if (rqstp->rq_xprt) + if (rqstp->rq_xprt) { + trace_svc_pool_awoken(rqstp); goto out_found; + } if (!time_left) percpu_counter_inc(&pool->sp_threads_timedout); @@ -784,7 +788,6 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) rqstp->rq_chandle.thread_wait = 5*HZ; else rqstp->rq_chandle.thread_wait = 1*HZ; - trace_svc_xprt_dequeue(rqstp); return rqstp->rq_xprt; } From patchwork Tue Jul 4 00:07:55 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13300548 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id A9679EB64DD for ; Tue, 4 Jul 2023 00:08:00 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229844AbjGDAH7 (ORCPT ); Mon, 3 Jul 2023 20:07:59 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:56272 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229504AbjGDAH7 (ORCPT ); Mon, 3 Jul 2023 20:07:59 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 3EFFF189 for ; Mon, 3 Jul 2023 17:07:58 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id D1C6C6101C for ; Tue, 4 Jul 2023 00:07:57 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id CF11CC433C7; Tue, 4 Jul 2023 00:07:56 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1688429277; bh=Pd5LJz7RnhNldHfozSUpV8hFuK5ZHPY0lcE/FPM6KwU=; h=Subject:From:To:Cc:Date:In-Reply-To:References:From; b=Cr5RlX82MJv24GfKUU4NjL0fCPlTKdByPRpbD/dgHa5m9WwIde+Axb3ochDD3CLqZ Ph7fqftp9NKL5eyzzmbgw+YpTR07wzlkMUSq1hDcEQHdR0Cyvp45JZZ7yojuFE5l8r MGRelWobKgASB/oP96eM3TxRrfILV2/pYRRzQ6SUzSFWcF+wK0zWdNEjkmOMCrzJa+ 6AferQOoAMQzkV8d2wUYSWLYSdeYrGFZEM3O/oj9yZIYBFz3d1QkFeWB3JlMnV95rv Sv3jdPU89J/dc9p5VnKSSHn0QOJshnWxm7aXRnxtNesGPNBxPW65pOm3FECW8plfHr Wc7hFUwkTdfTA== Subject: [PATCH v2 4/9] SUNRPC: Count ingress RPC messages per svc_pool From: Chuck Lever To: linux-nfs@vger.kernel.org Cc: Chuck Lever , lorenzo@kernel.org, neilb@suse.de, jlayton@redhat.com, david@fromorbit.com Date: Mon, 03 Jul 2023 20:07:55 -0400 Message-ID: <168842927591.139194.16920372497489479670.stgit@manet.1015granger.net> In-Reply-To: <168842897573.139194.15893960758088950748.stgit@manet.1015granger.net> References: <168842897573.139194.15893960758088950748.stgit@manet.1015granger.net> User-Agent: StGit/1.5 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-nfs@vger.kernel.org From: Chuck Lever To get a sense of the average number of transport enqueue operations needed to process an incoming RPC message, re-use the "packets" pool stat. Track the number of complete RPC messages processed by each thread pool. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 1 + net/sunrpc/svc.c | 2 ++ net/sunrpc/svc_xprt.c | 3 ++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index fbfe6ea737c8..74ea13270679 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -38,6 +38,7 @@ struct svc_pool { struct list_head sp_all_threads; /* all server threads */ /* statistics on pool operation */ + struct percpu_counter sp_messages_arrived; struct percpu_counter sp_sockets_queued; struct percpu_counter sp_threads_woken; struct percpu_counter sp_threads_timedout; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 04151e22ec44..ccc7ff3142dd 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -513,6 +513,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, INIT_LIST_HEAD(&pool->sp_all_threads); spin_lock_init(&pool->sp_lock); + percpu_counter_init(&pool->sp_messages_arrived, 0, GFP_KERNEL); percpu_counter_init(&pool->sp_sockets_queued, 0, GFP_KERNEL); percpu_counter_init(&pool->sp_threads_woken, 0, GFP_KERNEL); percpu_counter_init(&pool->sp_threads_timedout, 0, GFP_KERNEL); @@ -589,6 +590,7 @@ svc_destroy(struct kref *ref) for (i = 0; i < serv->sv_nrpools; i++) { struct svc_pool *pool = &serv->sv_pools[i]; + percpu_counter_destroy(&pool->sp_messages_arrived); percpu_counter_destroy(&pool->sp_sockets_queued); percpu_counter_destroy(&pool->sp_threads_woken); percpu_counter_destroy(&pool->sp_threads_timedout); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 7ee095d03996..ecbccf0d89b9 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -897,6 +897,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) if (serv->sv_stats) serv->sv_stats->netcnt++; + percpu_counter_inc(&rqstp->rq_pool->sp_messages_arrived); rqstp->rq_stime = ktime_get(); return len; out_release: @@ -1446,7 +1447,7 @@ static int svc_pool_stats_show(struct seq_file *m, void *p) seq_printf(m, "%u %llu %llu %llu %llu %llu\n", pool->sp_id, - percpu_counter_sum_positive(&pool->sp_sockets_queued), + percpu_counter_sum_positive(&pool->sp_messages_arrived), percpu_counter_sum_positive(&pool->sp_sockets_queued), percpu_counter_sum_positive(&pool->sp_threads_woken), percpu_counter_sum_positive(&pool->sp_threads_timedout), From patchwork Tue Jul 4 00:08:02 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13300549 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id EE2B3EB64DC for ; Tue, 4 Jul 2023 00:08:06 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230021AbjGDAIG (ORCPT ); Mon, 3 Jul 2023 20:08:06 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:56304 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229504AbjGDAIF (ORCPT ); Mon, 3 Jul 2023 20:08:05 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [IPv6:2604:1380:4641:c500::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id D6C65189 for ; Mon, 3 Jul 2023 17:08:04 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 6C9476101C for ; Tue, 4 Jul 2023 00:08:04 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 6AEA8C433C7; Tue, 4 Jul 2023 00:08:03 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1688429283; bh=8Ajmxa2wT4fPesgmRnmvkKnHxqn/kb+9NNZidM0+W4o=; h=Subject:From:To:Cc:Date:In-Reply-To:References:From; b=Pywrwgvv4j5tY0kd5D2+HfFbLnuhn8kAR7weUkNiZXLLpugdY7W/zEPcWkSbz9QQu e8LXtIwr0KDSTkFRa10uMxPUYPFtzols1PGzRL4Bk53FihB3ngtDdPoUg5JjVIwXO6 LxXaTUwzMBxapJ1Kuplh4dG6Uv9buPb3z8jzaHp8PtiFFyaJGD6Bt5swvO3+QMdWht H+zKV70HNlvmRc1m3Z9t+AvPDrQ9ZV7LAB9QbO7DjrA0F7mMUZ/wt6ukei/SFc+1yn AP0QzXP6Vljlp4B2kJpjv5Q3Gy5b2N01YRJ4DKH48BhafPemoioZXFnSp6d8399ozh REVMP2bQuSt8g== Subject: [PATCH v2 5/9] SUNRPC: Clean up svc_set_num_threads From: Chuck Lever To: linux-nfs@vger.kernel.org Cc: Chuck Lever , lorenzo@kernel.org, neilb@suse.de, jlayton@redhat.com, david@fromorbit.com Date: Mon, 03 Jul 2023 20:08:02 -0400 Message-ID: <168842928247.139194.9631504614280555265.stgit@manet.1015granger.net> In-Reply-To: <168842897573.139194.15893960758088950748.stgit@manet.1015granger.net> References: <168842897573.139194.15893960758088950748.stgit@manet.1015granger.net> User-Agent: StGit/1.5 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-nfs@vger.kernel.org From: Chuck Lever Document the API contract and remove stale or obvious comments. Signed-off-by: Chuck Lever --- net/sunrpc/svc.c | 60 +++++++++++++++++++++++------------------------------- 1 file changed, 25 insertions(+), 35 deletions(-) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index ccc7ff3142dd..42d38cd99f29 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -726,23 +726,14 @@ struct svc_rqst *svc_pool_wake_idle_thread(struct svc_serv *serv, return NULL; } -/* - * Choose a pool in which to create a new thread, for svc_set_num_threads - */ -static inline struct svc_pool * -choose_pool(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) +static struct svc_pool * +svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) { - if (pool != NULL) - return pool; - - return &serv->sv_pools[(*state)++ % serv->sv_nrpools]; + return pool ? pool : &serv->sv_pools[(*state)++ % serv->sv_nrpools]; } -/* - * Choose a thread to kill, for svc_set_num_threads - */ -static inline struct task_struct * -choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) +static struct task_struct * +svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) { unsigned int i; struct task_struct *task = NULL; @@ -750,7 +741,6 @@ choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) if (pool != NULL) { spin_lock_bh(&pool->sp_lock); } else { - /* choose a pool in round-robin fashion */ for (i = 0; i < serv->sv_nrpools; i++) { pool = &serv->sv_pools[--(*state) % serv->sv_nrpools]; spin_lock_bh(&pool->sp_lock); @@ -765,21 +755,15 @@ choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) if (!list_empty(&pool->sp_all_threads)) { struct svc_rqst *rqstp; - /* - * Remove from the pool->sp_all_threads list - * so we don't try to kill it again. - */ rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all); set_bit(RQ_VICTIM, &rqstp->rq_flags); list_del_rcu(&rqstp->rq_all); task = rqstp->rq_task; } spin_unlock_bh(&pool->sp_lock); - return task; } -/* create new threads */ static int svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { @@ -791,13 +775,12 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) do { nrservs--; - chosen_pool = choose_pool(serv, pool, &state); - + chosen_pool = svc_pool_next(serv, pool, &state); node = svc_pool_map_get_node(chosen_pool->sp_id); + rqstp = svc_prepare_thread(serv, chosen_pool, node); if (IS_ERR(rqstp)) return PTR_ERR(rqstp); - task = kthread_create_on_node(serv->sv_threadfn, rqstp, node, "%s", serv->sv_name); if (IS_ERR(task)) { @@ -816,15 +799,6 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) return 0; } -/* - * Create or destroy enough new threads to make the number - * of threads the given number. If `pool' is non-NULL, applies - * only to threads in that pool, otherwise round-robins between - * all pools. Caller must ensure that mutual exclusion between this and - * server startup or shutdown. - */ - -/* destroy old threads */ static int svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { @@ -832,9 +806,8 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) struct task_struct *task; unsigned int state = serv->sv_nrthreads-1; - /* destroy old threads */ do { - task = choose_victim(serv, pool, &state); + task = svc_pool_victim(serv, pool, &state); if (task == NULL) break; rqstp = kthread_data(task); @@ -846,6 +819,23 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) return 0; } +/** + * svc_set_num_threads - adjust number of threads per RPC service + * @serv: RPC service to adjust + * @pool: Specific pool from which to choose threads, or NULL + * @nrservs: New number of threads for @serv (0 or less means kill all threads) + * + * Create or destroy threads to make the number of threads for @serv the + * given number. If @pool is non-NULL, change only threads in that pool; + * otherwise, round-robin between all pools for @serv. @serv's + * sv_nrthreads is adjusted for each thread created or destroyed. + * + * Caller must ensure mutual exclusion between this and server startup or + * shutdown. + * + * Returns zero on success or a negative errno if an error occurred while + * starting a thread. + */ int svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { From patchwork Tue Jul 4 00:08:09 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13300550 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 84B27EB64DC for ; Tue, 4 Jul 2023 00:08:14 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229793AbjGDAIN (ORCPT ); Mon, 3 Jul 2023 20:08:13 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:56334 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229504AbjGDAIM (ORCPT ); Mon, 3 Jul 2023 20:08:12 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 5840018C for ; Mon, 3 Jul 2023 17:08:11 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id EAD9B6101C for ; Tue, 4 Jul 2023 00:08:10 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id EB3CAC433C9; Tue, 4 Jul 2023 00:08:09 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1688429290; bh=2AKlM2UKSM/EniRYCYKt9OAzmDYjbBy3x9koUY76S6Q=; h=Subject:From:To:Cc:Date:In-Reply-To:References:From; b=pyZgcqz5dV42Dk/xIfpJnKJ8++rTXnp5pt9DxwmLLn23x+uRCJI2FBRCBFlwjUfNl EVr6OqBcOlyFnGacMH5NhMrjtHzc7BCPd9hcZLzhE2k9HqZ3WKWdA7BKDMcD9zTLxW exs7q5xbJBjhCFPtZuHEtrgrtdh0X/Z0Ky5T4BiRHBxWPCt/MkVVj9i5/yCRYuPhtH zDHz8P/TOSBYG6qwjqcuAIm3LNZrQxZTh7o3If5LYWeJYAQjb4q5RsQFihfK9nLqz/ U1RFnxBuV4N37mG36gHRXg/cP4MHPPjLnirx54UMJMIOMcbIdixUX+IB+eo5vo3NMs +QRv7ODJZB0dQ== Subject: [PATCH v2 6/9] SUNRPC: Replace dprintk() call site in __svc_create() From: Chuck Lever To: linux-nfs@vger.kernel.org Cc: Chuck Lever , lorenzo@kernel.org, neilb@suse.de, jlayton@redhat.com, david@fromorbit.com Date: Mon, 03 Jul 2023 20:08:09 -0400 Message-ID: <168842928906.139194.18404088591647355711.stgit@manet.1015granger.net> In-Reply-To: <168842897573.139194.15893960758088950748.stgit@manet.1015granger.net> References: <168842897573.139194.15893960758088950748.stgit@manet.1015granger.net> User-Agent: StGit/1.5 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-nfs@vger.kernel.org From: Chuck Lever Done as part of converting SunRPC observability from printk-style to tracepoints. Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 23 +++++++++++++++++++++++ net/sunrpc/svc.c | 5 ++--- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 2e83887b58cd..60c8e03268d4 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1918,6 +1918,29 @@ TRACE_EVENT(svc_stats_latency, __get_str(procedure), __entry->execute) ); +TRACE_EVENT(svc_pool_init, + TP_PROTO( + const struct svc_serv *serv, + const struct svc_pool *pool + ), + + TP_ARGS(serv, pool), + + TP_STRUCT__entry( + __string(name, serv->sv_name) + __field(int, pool_id) + ), + + TP_fast_assign( + __assign_str(name, serv->sv_name); + __entry->pool_id = pool->sp_id; + ), + + TP_printk("service=%s pool=%d", + __get_str(name), __entry->pool_id + ) +); + #define show_svc_xprt_flags(flags) \ __print_flags(flags, "|", \ { BIT(XPT_BUSY), "BUSY" }, \ diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 42d38cd99f29..9b6701a38e71 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -505,9 +505,6 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, for (i = 0; i < serv->sv_nrpools; i++) { struct svc_pool *pool = &serv->sv_pools[i]; - dprintk("svc: initialising pool %u for %s\n", - i, serv->sv_name); - pool->sp_id = i; INIT_LIST_HEAD(&pool->sp_sockets); INIT_LIST_HEAD(&pool->sp_all_threads); @@ -518,6 +515,8 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, percpu_counter_init(&pool->sp_threads_woken, 0, GFP_KERNEL); percpu_counter_init(&pool->sp_threads_timedout, 0, GFP_KERNEL); percpu_counter_init(&pool->sp_threads_starved, 0, GFP_KERNEL); + + trace_svc_pool_init(serv, pool); } return serv; From patchwork Tue Jul 4 00:08:15 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13300551 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 7DC0CEB64DD for ; Tue, 4 Jul 2023 00:08:20 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230334AbjGDAIT (ORCPT ); Mon, 3 Jul 2023 20:08:19 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:56366 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229504AbjGDAIS (ORCPT ); Mon, 3 Jul 2023 20:08:18 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 0AAC618C for ; Mon, 3 Jul 2023 17:08:18 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 9A2186108B for ; Tue, 4 Jul 2023 00:08:17 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 92349C433CA; Tue, 4 Jul 2023 00:08:16 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1688429297; bh=zm+oq/ewIClG2atqKpuw+/NWExTCCv7xUTwq6hZ6ZMM=; h=Subject:From:To:Cc:Date:In-Reply-To:References:From; b=OErcOVWqOqiwM6WW/MKxqPi5kn5bGcorbWnQFNJpfWiUK+W6k93zYkExkq3d48OJS esxmU9Y3B3NhKttmJv1K54+JzMKZOgsS4QGfqW+Q14qcyfxLOH2ou6GwSOM2dkLQRn VHLfi83zC/mLcwzp3T/u3qu4V7/Xtes7mPg44VeDAkSoK44P2eEJ4xI/AbCJ/DOXsY JAYUPyiQkO198HrYuXn71FeG6HMfq3yjcK4t1y/b5cQzzMHWxQ1GuPgawChxx8My/z YtM60+1JIs7b1XW3TvB0xwhm2awtMR2A9Gm2smfmx4blQijEuqNciaUfxC7uNhWO/u GIkaMFaxjKNKw== Subject: [PATCH v2 7/9] SUNRPC: Don't disable BH's when taking sp_lock From: Chuck Lever To: linux-nfs@vger.kernel.org Cc: Chuck Lever , lorenzo@kernel.org, neilb@suse.de, jlayton@redhat.com, david@fromorbit.com Date: Mon, 03 Jul 2023 20:08:15 -0400 Message-ID: <168842929557.139194.4420161035549339648.stgit@manet.1015granger.net> In-Reply-To: <168842897573.139194.15893960758088950748.stgit@manet.1015granger.net> References: <168842897573.139194.15893960758088950748.stgit@manet.1015granger.net> User-Agent: StGit/1.5 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-nfs@vger.kernel.org From: Chuck Lever Consumers of sp_lock now all run in process context. We can avoid the overhead of disabling bottom halves when taking this lock. Signed-off-by: Chuck Lever --- net/sunrpc/svc_xprt.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index ecbccf0d89b9..8ced7591ce07 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -472,9 +472,9 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) pool = svc_pool_for_cpu(xprt->xpt_server); percpu_counter_inc(&pool->sp_sockets_queued); - spin_lock_bh(&pool->sp_lock); + spin_lock(&pool->sp_lock); list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); - spin_unlock_bh(&pool->sp_lock); + spin_unlock(&pool->sp_lock); rqstp = svc_pool_wake_idle_thread(xprt->xpt_server, pool); if (!rqstp) { @@ -496,14 +496,14 @@ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool) if (list_empty(&pool->sp_sockets)) goto out; - spin_lock_bh(&pool->sp_lock); + spin_lock(&pool->sp_lock); if (likely(!list_empty(&pool->sp_sockets))) { xprt = list_first_entry(&pool->sp_sockets, struct svc_xprt, xpt_ready); list_del_init(&xprt->xpt_ready); svc_xprt_get(xprt); } - spin_unlock_bh(&pool->sp_lock); + spin_unlock(&pool->sp_lock); out: return xprt; } @@ -1116,15 +1116,15 @@ static struct svc_xprt *svc_dequeue_net(struct svc_serv *serv, struct net *net) for (i = 0; i < serv->sv_nrpools; i++) { pool = &serv->sv_pools[i]; - spin_lock_bh(&pool->sp_lock); + spin_lock(&pool->sp_lock); list_for_each_entry_safe(xprt, tmp, &pool->sp_sockets, xpt_ready) { if (xprt->xpt_net != net) continue; list_del_init(&xprt->xpt_ready); - spin_unlock_bh(&pool->sp_lock); + spin_unlock(&pool->sp_lock); return xprt; } - spin_unlock_bh(&pool->sp_lock); + spin_unlock(&pool->sp_lock); } return NULL; } From patchwork Tue Jul 4 00:08:22 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13300552 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 7FEEAC0015E for ; Tue, 4 Jul 2023 00:08:29 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230337AbjGDAI1 (ORCPT ); Mon, 3 Jul 2023 20:08:27 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:56396 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229504AbjGDAI0 (ORCPT ); Mon, 3 Jul 2023 20:08:26 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 80388189 for ; Mon, 3 Jul 2023 17:08:24 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 1D09C61088 for ; Tue, 4 Jul 2023 00:08:24 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 18F53C433C9; Tue, 4 Jul 2023 00:08:23 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1688429303; bh=uDfcVKN0zEwGg845/QxKBljQo7DNt9BdCC2yqVAuirQ=; h=Subject:From:To:Cc:Date:In-Reply-To:References:From; b=Gh5sEOzFVQusxUMnro9ZDfCfvLP4agIFxM125aH0x98Gt4F1UdUrTyBemzZAUraS0 92qzmYSRVsEmFE9sHDaymaKQBekIFvCcGM4cAtEpA7cLb4PgpzACmM+zQuLwkMTJX+ PYAyYMX78sYXdztSNoU43anOltwNaUaActQeGkqROLlYehqPhfT1n2Gv/domLaS6qQ cy6LaYTNy8pRi6gK3Ufrsu6g//VDcbT5ojqY5CVKP+E1lGH89vxvyJUA9SAws9xYnN is1ewK2srnQu695YYNtA+C+SOrDNKJ++XLR+2vK/Ywo6oHBO1bjFkGbfGfERnd8oWP C9hY664RvSs4Q== Subject: [PATCH v2 8/9] SUNRPC: Replace sp_threads_all with an xarray From: Chuck Lever To: linux-nfs@vger.kernel.org Cc: Chuck Lever , lorenzo@kernel.org, neilb@suse.de, jlayton@redhat.com, david@fromorbit.com Date: Mon, 03 Jul 2023 20:08:22 -0400 Message-ID: <168842930220.139194.2628840255224609992.stgit@manet.1015granger.net> In-Reply-To: <168842897573.139194.15893960758088950748.stgit@manet.1015granger.net> References: <168842897573.139194.15893960758088950748.stgit@manet.1015granger.net> User-Agent: StGit/1.5 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-nfs@vger.kernel.org From: Chuck Lever We want a thread lookup operation that can be done with RCU only, but also we want to avoid the linked-list walk, which does not scale well in the number of pool threads. BH-disabled locking is no longer necessary because we're no longer sharing the pool's sp_lock to protect either the xarray or the pool's thread count. sp_lock also protects transport activity. There are no callers of svc_set_num_threads() that run outside of process context. Signed-off-by: Chuck Lever --- fs/nfsd/nfssvc.c | 3 +- include/linux/sunrpc/svc.h | 11 +++---- include/trace/events/sunrpc.h | 47 ++++++++++++++++++++++++++++- net/sunrpc/svc.c | 67 ++++++++++++++++++++++++----------------- net/sunrpc/svc_xprt.c | 2 + 5 files changed, 93 insertions(+), 37 deletions(-) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 2154fa63c5f2..d42b2a40c93c 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -62,8 +62,7 @@ static __be32 nfsd_init_request(struct svc_rqst *, * If (out side the lock) nn->nfsd_serv is non-NULL, then it must point to a * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0 (unless * nn->keep_active is set). That number of nfsd threads must - * exist and each must be listed in ->sp_all_threads in some entry of - * ->sv_pools[]. + * exist and each must be listed in some entry of ->sv_pools[]. * * Each active thread holds a counted reference on nn->nfsd_serv, as does * the nn->keep_active flag and various transient calls to svc_get(). diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 74ea13270679..6f8bfcd44250 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -32,10 +32,10 @@ */ struct svc_pool { unsigned int sp_id; /* pool id; also node id on NUMA */ - spinlock_t sp_lock; /* protects all fields */ + spinlock_t sp_lock; /* protects sp_sockets */ struct list_head sp_sockets; /* pending sockets */ unsigned int sp_nrthreads; /* # of threads in pool */ - struct list_head sp_all_threads; /* all server threads */ + struct xarray sp_thread_xa; /* statistics on pool operation */ struct percpu_counter sp_messages_arrived; @@ -195,7 +195,6 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp); * processed. */ struct svc_rqst { - struct list_head rq_all; /* all threads list */ struct rcu_head rq_rcu_head; /* for RCU deferred kfree */ struct svc_xprt * rq_xprt; /* transport ptr */ @@ -240,10 +239,10 @@ struct svc_rqst { #define RQ_SPLICE_OK (4) /* turned off in gss privacy * to prevent encrypting page * cache pages */ -#define RQ_VICTIM (5) /* about to be shut down */ -#define RQ_BUSY (6) /* request is busy */ -#define RQ_DATA (7) /* request has data */ +#define RQ_BUSY (5) /* request is busy */ +#define RQ_DATA (6) /* request has data */ unsigned long rq_flags; /* flags field */ + u32 rq_thread_id; /* xarray index */ ktime_t rq_qtime; /* enqueue time */ void * rq_argp; /* decoded arguments */ diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 60c8e03268d4..ea43c6059bdb 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1676,7 +1676,6 @@ DEFINE_SVCXDRBUF_EVENT(sendto); svc_rqst_flag(USEDEFERRAL) \ svc_rqst_flag(DROPME) \ svc_rqst_flag(SPLICE_OK) \ - svc_rqst_flag(VICTIM) \ svc_rqst_flag(BUSY) \ svc_rqst_flag_end(DATA) @@ -2118,6 +2117,52 @@ TRACE_EVENT(svc_pool_starved, ) ); +DECLARE_EVENT_CLASS(svc_thread_lifetime_class, + TP_PROTO( + const struct svc_serv *serv, + const struct svc_pool *pool, + const struct svc_rqst *rqstp + ), + + TP_ARGS(serv, pool, rqstp), + + TP_STRUCT__entry( + __string(name, serv->sv_name) + __field(int, pool_id) + __field(unsigned int, nrthreads) + __field(unsigned long, pool_flags) + __field(u32, thread_id) + __field(const void *, rqstp) + ), + + TP_fast_assign( + __assign_str(name, serv->sv_name); + __entry->pool_id = pool->sp_id; + __entry->nrthreads = pool->sp_nrthreads; + __entry->pool_flags = pool->sp_flags; + __entry->thread_id = rqstp->rq_thread_id; + __entry->rqstp = rqstp; + ), + + TP_printk("service=%s pool=%d pool_flags=%s nrthreads=%u thread_id=%u", + __get_str(name), __entry->pool_id, + show_svc_pool_flags(__entry->pool_flags), + __entry->nrthreads, __entry->thread_id + ) +); + +#define DEFINE_SVC_THREAD_LIFETIME_EVENT(name) \ + DEFINE_EVENT(svc_thread_lifetime_class, svc_pool_##name, \ + TP_PROTO( \ + const struct svc_serv *serv, \ + const struct svc_pool *pool, \ + const struct svc_rqst *rqstp \ + ), \ + TP_ARGS(serv, pool, rqstp)) + +DEFINE_SVC_THREAD_LIFETIME_EVENT(thread_init); +DEFINE_SVC_THREAD_LIFETIME_EVENT(thread_exit); + DECLARE_EVENT_CLASS(svc_xprt_event, TP_PROTO( const struct svc_xprt *xprt diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 9b6701a38e71..ef350f0d8925 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -507,8 +507,8 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, pool->sp_id = i; INIT_LIST_HEAD(&pool->sp_sockets); - INIT_LIST_HEAD(&pool->sp_all_threads); spin_lock_init(&pool->sp_lock); + xa_init_flags(&pool->sp_thread_xa, XA_FLAGS_ALLOC); percpu_counter_init(&pool->sp_messages_arrived, 0, GFP_KERNEL); percpu_counter_init(&pool->sp_sockets_queued, 0, GFP_KERNEL); @@ -594,6 +594,8 @@ svc_destroy(struct kref *ref) percpu_counter_destroy(&pool->sp_threads_woken); percpu_counter_destroy(&pool->sp_threads_timedout); percpu_counter_destroy(&pool->sp_threads_starved); + + xa_destroy(&pool->sp_thread_xa); } kfree(serv->sv_pools); kfree(serv); @@ -674,7 +676,11 @@ EXPORT_SYMBOL_GPL(svc_rqst_alloc); static struct svc_rqst * svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) { + static const struct xa_limit limit = { + .max = U32_MAX, + }; struct svc_rqst *rqstp; + int ret; rqstp = svc_rqst_alloc(serv, pool, node); if (!rqstp) @@ -685,15 +691,25 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) serv->sv_nrthreads += 1; spin_unlock_bh(&serv->sv_lock); - spin_lock_bh(&pool->sp_lock); + xa_lock(&pool->sp_thread_xa); + ret = __xa_alloc(&pool->sp_thread_xa, &rqstp->rq_thread_id, rqstp, + limit, GFP_KERNEL); + if (ret) { + xa_unlock(&pool->sp_thread_xa); + goto out_free; + } pool->sp_nrthreads++; - list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads); - spin_unlock_bh(&pool->sp_lock); + xa_unlock(&pool->sp_thread_xa); + trace_svc_pool_thread_init(serv, pool, rqstp); return rqstp; + +out_free: + svc_rqst_free(rqstp); + return ERR_PTR(ret); } /** - * svc_pool_wake_idle_thread - wake an idle thread in @pool + * svc_pool_wake_idle_thread - Find and wake an idle thread in @pool * @serv: RPC service * @pool: service thread pool * @@ -706,19 +722,17 @@ struct svc_rqst *svc_pool_wake_idle_thread(struct svc_serv *serv, struct svc_pool *pool) { struct svc_rqst *rqstp; + unsigned long index; - rcu_read_lock(); - list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { + xa_for_each(&pool->sp_thread_xa, index, rqstp) { if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) continue; - rcu_read_unlock(); WRITE_ONCE(rqstp->rq_qtime, ktime_get()); wake_up_process(rqstp->rq_task); percpu_counter_inc(&pool->sp_threads_woken); return rqstp; } - rcu_read_unlock(); trace_svc_pool_starved(serv, pool); percpu_counter_inc(&pool->sp_threads_starved); @@ -734,32 +748,31 @@ svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) static struct task_struct * svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) { - unsigned int i; struct task_struct *task = NULL; + struct svc_rqst *rqstp; + unsigned long zero = 0; + unsigned int i; if (pool != NULL) { - spin_lock_bh(&pool->sp_lock); + xa_lock(&pool->sp_thread_xa); } else { for (i = 0; i < serv->sv_nrpools; i++) { pool = &serv->sv_pools[--(*state) % serv->sv_nrpools]; - spin_lock_bh(&pool->sp_lock); - if (!list_empty(&pool->sp_all_threads)) + xa_lock(&pool->sp_thread_xa); + if (!xa_empty(&pool->sp_thread_xa)) goto found_pool; - spin_unlock_bh(&pool->sp_lock); + xa_unlock(&pool->sp_thread_xa); } return NULL; } found_pool: - if (!list_empty(&pool->sp_all_threads)) { - struct svc_rqst *rqstp; - - rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all); - set_bit(RQ_VICTIM, &rqstp->rq_flags); - list_del_rcu(&rqstp->rq_all); + rqstp = xa_find(&pool->sp_thread_xa, &zero, U32_MAX, XA_PRESENT); + if (rqstp) { + __xa_erase(&pool->sp_thread_xa, rqstp->rq_thread_id); task = rqstp->rq_task; } - spin_unlock_bh(&pool->sp_lock); + xa_unlock(&pool->sp_thread_xa); return task; } @@ -841,9 +854,9 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) if (pool == NULL) { nrservs -= serv->sv_nrthreads; } else { - spin_lock_bh(&pool->sp_lock); + xa_lock(&pool->sp_thread_xa); nrservs -= pool->sp_nrthreads; - spin_unlock_bh(&pool->sp_lock); + xa_unlock(&pool->sp_thread_xa); } if (nrservs > 0) @@ -930,11 +943,11 @@ svc_exit_thread(struct svc_rqst *rqstp) struct svc_serv *serv = rqstp->rq_server; struct svc_pool *pool = rqstp->rq_pool; - spin_lock_bh(&pool->sp_lock); + xa_lock(&pool->sp_thread_xa); pool->sp_nrthreads--; - if (!test_and_set_bit(RQ_VICTIM, &rqstp->rq_flags)) - list_del_rcu(&rqstp->rq_all); - spin_unlock_bh(&pool->sp_lock); + __xa_erase(&pool->sp_thread_xa, rqstp->rq_thread_id); + xa_unlock(&pool->sp_thread_xa); + trace_svc_pool_thread_exit(serv, pool, rqstp); spin_lock_bh(&serv->sv_lock); serv->sv_nrthreads -= 1; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 8ced7591ce07..7709120b45c1 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -46,7 +46,7 @@ static LIST_HEAD(svc_xprt_class_list); /* SMP locking strategy: * - * svc_pool->sp_lock protects most of the fields of that pool. + * svc_pool->sp_lock protects sp_sockets. * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. * when both need to be taken (rare), svc_serv->sv_lock is first. * The "service mutex" protects svc_serv->sv_nrthread. From patchwork Tue Jul 4 00:08:28 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13300553 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id D0EA9EB64DC for ; Tue, 4 Jul 2023 00:08:34 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230385AbjGDAId (ORCPT ); Mon, 3 Jul 2023 20:08:33 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:56432 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230331AbjGDAIc (ORCPT ); Mon, 3 Jul 2023 20:08:32 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [IPv6:2604:1380:4641:c500::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 2E71518D for ; Mon, 3 Jul 2023 17:08:31 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 9FB626108B for ; Tue, 4 Jul 2023 00:08:30 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 9B8D5C433C9; Tue, 4 Jul 2023 00:08:29 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1688429310; bh=/5UI+01295S2ivNp5HMKHFl/RhWyCK6lpXV6Z7atyNU=; h=Subject:From:To:Cc:Date:In-Reply-To:References:From; b=E5J6DyhJgmhor7QR63Dl4+YH2YMAdzI6MmN+IGD3kSZOxBVHEdjoXDu6LN5rHEQ6a QEX6/XpmnG8Mt3tpsmiGS2E8A3Ymu5RaRHLagU6ha3+fbOseSCS3DHsLHEEZZSu1tV h9OnIxJOeOaE+wlaUn8Xzinhee0ZO5iE/6RsLXGR9nrR4Mj0Ut/ctXenefCoFXXvbG dH5PvpXi22UsQHU3Q7Iagt9MoS8nF0gwYlGhW3Zg8KKKfyGzVSDJNQig6knyQumb8G qfyghWnyjcJV0K67mB48zmlgbLQxg6UpmpKNjXnpg6RvoWW/A5I6UZ+CC2OPqRSkDS VXdJ6Bp9B8Rtg== Subject: [PATCH v2 9/9] SUNRPC: Convert RQ_BUSY into a per-pool bitmap From: Chuck Lever To: linux-nfs@vger.kernel.org Cc: Chuck Lever , lorenzo@kernel.org, neilb@suse.de, jlayton@redhat.com, david@fromorbit.com Date: Mon, 03 Jul 2023 20:08:28 -0400 Message-ID: <168842930872.139194.10164846167275218299.stgit@manet.1015granger.net> In-Reply-To: <168842897573.139194.15893960758088950748.stgit@manet.1015granger.net> References: <168842897573.139194.15893960758088950748.stgit@manet.1015granger.net> User-Agent: StGit/1.5 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-nfs@vger.kernel.org From: Chuck Lever I've noticed that client-observed server request latency goes up simply when the nfsd thread count is increased. List walking is known to be memory-inefficient. On a busy server with many threads, enqueuing a transport will walk the "all threads" list quite frequently. This also pulls in the cache lines for some hot fields in each svc_rqst (namely, rq_flags). The svc_xprt_enqueue() call that concerns me most is the one in svc_rdma_wc_receive(), which is single-threaded per CQ. Slowing down completion handling limits the total throughput per RDMA connection. So, avoid walking the "all threads" list to find an idle thread to wake. Instead, set up an idle bitmap and use find_next_bit, which should work the same way as RQ_BUSY but it will touch only the cachelines that the bitmap is in. Stick with atomic bit operations to avoid taking the pool lock. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 6 ++++-- include/trace/events/sunrpc.h | 1 - net/sunrpc/svc.c | 27 +++++++++++++++++++++------ net/sunrpc/svc_xprt.c | 30 ++++++++++++++++++++++++------ 4 files changed, 49 insertions(+), 15 deletions(-) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 6f8bfcd44250..27ffcf7371d0 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -35,6 +35,7 @@ struct svc_pool { spinlock_t sp_lock; /* protects sp_sockets */ struct list_head sp_sockets; /* pending sockets */ unsigned int sp_nrthreads; /* # of threads in pool */ + unsigned long *sp_idle_map; /* idle threads */ struct xarray sp_thread_xa; /* statistics on pool operation */ @@ -190,6 +191,8 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp); #define RPCSVC_MAXPAGES ((RPCSVC_MAXPAYLOAD+PAGE_SIZE-1)/PAGE_SIZE \ + 2 + 1) +#define RPCSVC_MAXPOOLTHREADS (4096) + /* * The context of a single thread, including the request currently being * processed. @@ -239,8 +242,7 @@ struct svc_rqst { #define RQ_SPLICE_OK (4) /* turned off in gss privacy * to prevent encrypting page * cache pages */ -#define RQ_BUSY (5) /* request is busy */ -#define RQ_DATA (6) /* request has data */ +#define RQ_DATA (5) /* request has data */ unsigned long rq_flags; /* flags field */ u32 rq_thread_id; /* xarray index */ ktime_t rq_qtime; /* enqueue time */ diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index ea43c6059bdb..c07824a254bf 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1676,7 +1676,6 @@ DEFINE_SVCXDRBUF_EVENT(sendto); svc_rqst_flag(USEDEFERRAL) \ svc_rqst_flag(DROPME) \ svc_rqst_flag(SPLICE_OK) \ - svc_rqst_flag(BUSY) \ svc_rqst_flag_end(DATA) #undef svc_rqst_flag diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index ef350f0d8925..d0278e5190ba 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -509,6 +509,12 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, INIT_LIST_HEAD(&pool->sp_sockets); spin_lock_init(&pool->sp_lock); xa_init_flags(&pool->sp_thread_xa, XA_FLAGS_ALLOC); + /* All threads initially marked "busy" */ + pool->sp_idle_map = + bitmap_zalloc_node(RPCSVC_MAXPOOLTHREADS, GFP_KERNEL, + svc_pool_map_get_node(i)); + if (!pool->sp_idle_map) + return NULL; percpu_counter_init(&pool->sp_messages_arrived, 0, GFP_KERNEL); percpu_counter_init(&pool->sp_sockets_queued, 0, GFP_KERNEL); @@ -596,6 +602,8 @@ svc_destroy(struct kref *ref) percpu_counter_destroy(&pool->sp_threads_starved); xa_destroy(&pool->sp_thread_xa); + bitmap_free(pool->sp_idle_map); + pool->sp_idle_map = NULL; } kfree(serv->sv_pools); kfree(serv); @@ -647,7 +655,6 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node) folio_batch_init(&rqstp->rq_fbatch); - __set_bit(RQ_BUSY, &rqstp->rq_flags); rqstp->rq_server = serv; rqstp->rq_pool = pool; @@ -677,7 +684,7 @@ static struct svc_rqst * svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) { static const struct xa_limit limit = { - .max = U32_MAX, + .max = RPCSVC_MAXPOOLTHREADS, }; struct svc_rqst *rqstp; int ret; @@ -722,12 +729,19 @@ struct svc_rqst *svc_pool_wake_idle_thread(struct svc_serv *serv, struct svc_pool *pool) { struct svc_rqst *rqstp; - unsigned long index; + unsigned long bit; - xa_for_each(&pool->sp_thread_xa, index, rqstp) { - if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) + /* Check the pool's idle bitmap locklessly so that multiple + * idle searches can proceed concurrently. + */ + for_each_set_bit(bit, pool->sp_idle_map, pool->sp_nrthreads) { + if (!test_and_clear_bit(bit, pool->sp_idle_map)) continue; + rqstp = xa_load(&pool->sp_thread_xa, bit); + if (!rqstp) + break; + WRITE_ONCE(rqstp->rq_qtime, ktime_get()); wake_up_process(rqstp->rq_task); percpu_counter_inc(&pool->sp_threads_woken); @@ -767,7 +781,8 @@ svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *stat } found_pool: - rqstp = xa_find(&pool->sp_thread_xa, &zero, U32_MAX, XA_PRESENT); + rqstp = xa_find(&pool->sp_thread_xa, &zero, RPCSVC_MAXPOOLTHREADS, + XA_PRESENT); if (rqstp) { __xa_erase(&pool->sp_thread_xa, rqstp->rq_thread_id); task = rqstp->rq_task; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 7709120b45c1..2844b32c16ea 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -735,6 +735,25 @@ rqst_should_sleep(struct svc_rqst *rqstp) return true; } +static void svc_pool_thread_mark_idle(struct svc_pool *pool, + struct svc_rqst *rqstp) +{ + smp_mb__before_atomic(); + set_bit(rqstp->rq_thread_id, pool->sp_idle_map); + smp_mb__after_atomic(); +} + +/* + * Note: If we were awoken, then this rqstp has already been marked busy. + */ +static void svc_pool_thread_mark_busy(struct svc_pool *pool, + struct svc_rqst *rqstp) +{ + smp_mb__before_atomic(); + clear_bit(rqstp->rq_thread_id, pool->sp_idle_map); + smp_mb__after_atomic(); +} + static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) { struct svc_pool *pool = rqstp->rq_pool; @@ -756,18 +775,17 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) set_current_state(TASK_INTERRUPTIBLE); smp_mb__before_atomic(); clear_bit(SP_CONGESTED, &pool->sp_flags); - clear_bit(RQ_BUSY, &rqstp->rq_flags); - smp_mb__after_atomic(); - if (likely(rqst_should_sleep(rqstp))) + if (likely(rqst_should_sleep(rqstp))) { + svc_pool_thread_mark_idle(pool, rqstp); time_left = schedule_timeout(timeout); - else + } else __set_current_state(TASK_RUNNING); try_to_freeze(); - set_bit(RQ_BUSY, &rqstp->rq_flags); - smp_mb__after_atomic(); + svc_pool_thread_mark_busy(pool, rqstp); + rqstp->rq_xprt = svc_xprt_dequeue(pool); if (rqstp->rq_xprt) { trace_svc_pool_awoken(rqstp);