From patchwork Mon Jul 10 16:42:00 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13307388 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 73462EB64D9 for ; Mon, 10 Jul 2023 16:42:05 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230227AbjGJQmE (ORCPT ); Mon, 10 Jul 2023 12:42:04 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:45106 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230016AbjGJQmD (ORCPT ); Mon, 10 Jul 2023 12:42:03 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [IPv6:2604:1380:4641:c500::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id D449AC0 for ; Mon, 10 Jul 2023 09:42:02 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 689D161124 for ; Mon, 10 Jul 2023 16:42:02 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 5C52BC433C8; Mon, 10 Jul 2023 16:42:01 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1689007321; bh=V7jVm1GC/C3FYiTl8Eo30S8B6hhZ+AOBMyp56VVnInc=; h=Subject:From:To:Cc:Date:In-Reply-To:References:From; b=nHNXX72OJ8iuOXwpdj/B6hEFgOf57RAPCptxKVU+6h0oX3OtV468gEnpartQhkXAa 7bYqdm9FMVg+g19zn2dfaBRCNMeJ2XKSaY8vMvSZxXw7U+UWL6AUUHTSTyNfpfSf3c G/IQF6mrr3xD/COZIgg9m8+aa2WB+wzL4l/q1ih+BQJ3/ttgcmi5/Mx146RP+4sBiT 4oJxzJh66HuDToaUdu7kp0o/UnY1ySgRbWzY2lSs3/p3WzYehdsfYI3sTUF5iiepb5 nxDW6r/UJIpww/5LKUUze2sqd5qhE95Sit1DPd/tJ32iX2Qg0RW6ahTxXHDfvv1iFw 7EMq+11kZmBtw== Subject: [PATCH v3 1/9] SUNRPC: Deduplicate thread wake-up code From: Chuck Lever To: linux-nfs@vger.kernel.org Cc: Chuck Lever , lorenzo@kernel.org, neilb@suse.de, jlayton@redhat.com, david@fromorbit.com Date: Mon, 10 Jul 2023 12:42:00 -0400 Message-ID: <168900732034.7514.4396138579922803024.stgit@manet.1015granger.net> In-Reply-To: <168900729243.7514.15141312295052254929.stgit@manet.1015granger.net> References: <168900729243.7514.15141312295052254929.stgit@manet.1015granger.net> User-Agent: StGit/1.5 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-nfs@vger.kernel.org From: Chuck Lever Refactor: Extract the loop that finds an idle service thread from svc_xprt_enqueue() and svc_wake_up(). Both functions do just about the same thing. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 1 + net/sunrpc/svc.c | 28 ++++++++++++++++++++++++++ net/sunrpc/svc_xprt.c | 48 +++++++++++++++----------------------------- 3 files changed, 45 insertions(+), 32 deletions(-) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index f8751118c122..dc2d90a655e2 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -427,6 +427,7 @@ int svc_register(const struct svc_serv *, struct net *, const int, void svc_wake_up(struct svc_serv *); void svc_reserve(struct svc_rqst *rqstp, int space); +struct svc_rqst *svc_pool_wake_idle_thread(struct svc_pool *pool); struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv); char * svc_print_addr(struct svc_rqst *, char *, size_t); const char * svc_proc_name(const struct svc_rqst *rqstp); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 587811a002c9..05ee92b5fa1e 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -689,6 +689,34 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) return rqstp; } +/** + * svc_pool_wake_idle_thread - Awaken an idle thread in @pool + * @pool: service thread pool + * + * Returns an idle service thread (now marked BUSY), or NULL + * if no service threads are available. Finding an idle service + * thread and marking it BUSY is atomic with respect to other + * calls to svc_pool_wake_idle_thread(). + */ +struct svc_rqst *svc_pool_wake_idle_thread(struct svc_pool *pool) +{ + struct svc_rqst *rqstp; + + rcu_read_lock(); + list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { + if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) + continue; + + rcu_read_unlock(); + WRITE_ONCE(rqstp->rq_qtime, ktime_get()); + wake_up_process(rqstp->rq_task); + percpu_counter_inc(&pool->sp_threads_woken); + return rqstp; + } + rcu_read_unlock(); + return NULL; +} + /* * Choose a pool in which to create a new thread, for svc_set_num_threads */ diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 62c7919ea610..89302bf09b77 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -455,8 +455,8 @@ static bool svc_xprt_ready(struct svc_xprt *xprt) */ void svc_xprt_enqueue(struct svc_xprt *xprt) { + struct svc_rqst *rqstp; struct svc_pool *pool; - struct svc_rqst *rqstp = NULL; if (!svc_xprt_ready(xprt)) return; @@ -476,20 +476,10 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); spin_unlock_bh(&pool->sp_lock); - /* find a thread for this xprt */ - rcu_read_lock(); - list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { - if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) - continue; - percpu_counter_inc(&pool->sp_threads_woken); - rqstp->rq_qtime = ktime_get(); - wake_up_process(rqstp->rq_task); - goto out_unlock; - } - set_bit(SP_CONGESTED, &pool->sp_flags); - rqstp = NULL; -out_unlock: - rcu_read_unlock(); + rqstp = svc_pool_wake_idle_thread(pool); + if (!rqstp) + set_bit(SP_CONGESTED, &pool->sp_flags); + trace_svc_xprt_enqueue(xprt, rqstp); } EXPORT_SYMBOL_GPL(svc_xprt_enqueue); @@ -581,7 +571,10 @@ static void svc_xprt_release(struct svc_rqst *rqstp) svc_xprt_put(xprt); } -/* +/** + * svc_wake_up - Wake up a service thread for non-transport work + * @serv: RPC service + * * Some svc_serv's will have occasional work to do, even when a xprt is not * waiting to be serviced. This function is there to "kick" a task in one of * those services so that it can wake up and do that work. Note that we only @@ -590,27 +583,18 @@ static void svc_xprt_release(struct svc_rqst *rqstp) */ void svc_wake_up(struct svc_serv *serv) { + struct svc_pool *pool = &serv->sv_pools[0]; struct svc_rqst *rqstp; - struct svc_pool *pool; - pool = &serv->sv_pools[0]; - - rcu_read_lock(); - list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { - /* skip any that aren't queued */ - if (test_bit(RQ_BUSY, &rqstp->rq_flags)) - continue; - rcu_read_unlock(); - wake_up_process(rqstp->rq_task); - trace_svc_wake_up(rqstp->rq_task->pid); + rqstp = svc_pool_wake_idle_thread(pool); + if (!rqstp) { + set_bit(SP_TASK_PENDING, &pool->sp_flags); + smp_wmb(); + trace_svc_wake_up(0); return; } - rcu_read_unlock(); - /* No free entries available */ - set_bit(SP_TASK_PENDING, &pool->sp_flags); - smp_wmb(); - trace_svc_wake_up(0); + trace_svc_wake_up(rqstp->rq_task->pid); } EXPORT_SYMBOL_GPL(svc_wake_up); From patchwork Mon Jul 10 16:42:07 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13307389 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 50234EB64DA for ; Mon, 10 Jul 2023 16:42:12 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230016AbjGJQmL (ORCPT ); Mon, 10 Jul 2023 12:42:11 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:45146 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230238AbjGJQmK (ORCPT ); Mon, 10 Jul 2023 12:42:10 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [IPv6:2604:1380:4641:c500::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 6455FC0 for ; Mon, 10 Jul 2023 09:42:09 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id EE9886101E for ; Mon, 10 Jul 2023 16:42:08 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id EDB35C433C7; Mon, 10 Jul 2023 16:42:07 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1689007328; bh=5X+RAkYJgFfBpaSONJGJGm077v85R/L6N3rVflk0/wI=; h=Subject:From:To:Cc:Date:In-Reply-To:References:From; b=peEBc46/NXXFP9Fu85ckpi0HnzI+QUSMfQYjKfDlfdxAo3ywfnhXZe81iZMwNhLrM kwg5gi0TtXF9vtjXJIJfTLDWbmyyoT4T+R8+tS6zz/zy9f89tEc7HqLlRQ+o1aLzhp L1j05vdsGq+cs7f3aKwf/rOFvtQxYoeHPM6Zw8rIQYCPGCQxW7LmAThnMGpN8IDL8C l7Y6O45gI6LOTUUJQUt+TvU/LgWy33R/QJM25Xwfqm9CI45OLcCjdrxuTwrt/lVoqy CJAYXIAbgm8o8zR3ktiH6qdnZrpMS1PkYUkoYWAn7qTdQt6Wa8LmQt1oDxYYiyMtA2 EMNN4fv+OqZNQ== Subject: [PATCH v3 2/9] SUNRPC: Report when no service thread is available. From: Chuck Lever To: linux-nfs@vger.kernel.org Cc: Chuck Lever , lorenzo@kernel.org, neilb@suse.de, jlayton@redhat.com, david@fromorbit.com Date: Mon, 10 Jul 2023 12:42:07 -0400 Message-ID: <168900732701.7514.15763852101603746862.stgit@manet.1015granger.net> In-Reply-To: <168900729243.7514.15141312295052254929.stgit@manet.1015granger.net> References: <168900729243.7514.15141312295052254929.stgit@manet.1015granger.net> User-Agent: StGit/1.5 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-nfs@vger.kernel.org From: Chuck Lever Count and record thread pool starvation. Administrators can take action by increasing thread count or decreasing workload. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 5 +++- include/trace/events/sunrpc.h | 49 ++++++++++++++++++++++++++++++++++------- net/sunrpc/svc.c | 9 +++++++- net/sunrpc/svc_xprt.c | 22 ++++++++++-------- 4 files changed, 64 insertions(+), 21 deletions(-) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index dc2d90a655e2..fbfe6ea737c8 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -22,7 +22,6 @@ #include /* - * * RPC service thread pool. * * Pool of threads and temporary sockets. Generally there is only @@ -42,6 +41,7 @@ struct svc_pool { struct percpu_counter sp_sockets_queued; struct percpu_counter sp_threads_woken; struct percpu_counter sp_threads_timedout; + struct percpu_counter sp_threads_starved; #define SP_TASK_PENDING (0) /* still work to do even if no * xprt is queued. */ @@ -427,7 +427,8 @@ int svc_register(const struct svc_serv *, struct net *, const int, void svc_wake_up(struct svc_serv *); void svc_reserve(struct svc_rqst *rqstp, int space); -struct svc_rqst *svc_pool_wake_idle_thread(struct svc_pool *pool); +struct svc_rqst *svc_pool_wake_idle_thread(struct svc_serv *serv, + struct svc_pool *pool); struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv); char * svc_print_addr(struct svc_rqst *, char *, size_t); const char * svc_proc_name(const struct svc_rqst *rqstp); diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 43711753616a..9b70fc1c698a 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1994,21 +1994,21 @@ TRACE_EVENT(svc_xprt_create_err, TRACE_EVENT(svc_xprt_enqueue, TP_PROTO( const struct svc_xprt *xprt, - const struct svc_rqst *rqst + const struct svc_rqst *wakee ), - TP_ARGS(xprt, rqst), + TP_ARGS(xprt, wakee), TP_STRUCT__entry( SVC_XPRT_ENDPOINT_FIELDS(xprt) - __field(int, pid) + __field(pid_t, pid) ), TP_fast_assign( SVC_XPRT_ENDPOINT_ASSIGNMENTS(xprt); - __entry->pid = rqst? rqst->rq_task->pid : 0; + __entry->pid = wakee->rq_task->pid; ), TP_printk(SVC_XPRT_ENDPOINT_FORMAT " pid=%d", @@ -2039,6 +2039,39 @@ TRACE_EVENT(svc_xprt_dequeue, SVC_XPRT_ENDPOINT_VARARGS, __entry->wakeup) ); +#define show_svc_pool_flags(x) \ + __print_flags(x, "|", \ + { BIT(SP_TASK_PENDING), "TASK_PENDING" }, \ + { BIT(SP_CONGESTED), "CONGESTED" }) + +TRACE_EVENT(svc_pool_starved, + TP_PROTO( + const struct svc_serv *serv, + const struct svc_pool *pool + ), + + TP_ARGS(serv, pool), + + TP_STRUCT__entry( + __string(name, serv->sv_name) + __field(int, pool_id) + __field(unsigned int, nrthreads) + __field(unsigned long, flags) + ), + + TP_fast_assign( + __assign_str(name, serv->sv_name); + __entry->pool_id = pool->sp_id; + __entry->nrthreads = pool->sp_nrthreads; + __entry->flags = pool->sp_flags; + ), + + TP_printk("service=%s pool=%d flags=%s nrthreads=%u", + __get_str(name), __entry->pool_id, + show_svc_pool_flags(__entry->flags), __entry->nrthreads + ) +); + DECLARE_EVENT_CLASS(svc_xprt_event, TP_PROTO( const struct svc_xprt *xprt @@ -2109,16 +2142,16 @@ TRACE_EVENT(svc_xprt_accept, ); TRACE_EVENT(svc_wake_up, - TP_PROTO(int pid), + TP_PROTO(const struct svc_rqst *wakee), - TP_ARGS(pid), + TP_ARGS(wakee), TP_STRUCT__entry( - __field(int, pid) + __field(pid_t, pid) ), TP_fast_assign( - __entry->pid = pid; + __entry->pid = wakee->rq_task->pid; ), TP_printk("pid=%d", __entry->pid) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 05ee92b5fa1e..b79b8b41905d 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -516,6 +516,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, percpu_counter_init(&pool->sp_sockets_queued, 0, GFP_KERNEL); percpu_counter_init(&pool->sp_threads_woken, 0, GFP_KERNEL); percpu_counter_init(&pool->sp_threads_timedout, 0, GFP_KERNEL); + percpu_counter_init(&pool->sp_threads_starved, 0, GFP_KERNEL); } return serv; @@ -591,6 +592,7 @@ svc_destroy(struct kref *ref) percpu_counter_destroy(&pool->sp_sockets_queued); percpu_counter_destroy(&pool->sp_threads_woken); percpu_counter_destroy(&pool->sp_threads_timedout); + percpu_counter_destroy(&pool->sp_threads_starved); } kfree(serv->sv_pools); kfree(serv); @@ -691,6 +693,7 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) /** * svc_pool_wake_idle_thread - Awaken an idle thread in @pool + * @serv: RPC service * @pool: service thread pool * * Returns an idle service thread (now marked BUSY), or NULL @@ -698,7 +701,8 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) * thread and marking it BUSY is atomic with respect to other * calls to svc_pool_wake_idle_thread(). */ -struct svc_rqst *svc_pool_wake_idle_thread(struct svc_pool *pool) +struct svc_rqst *svc_pool_wake_idle_thread(struct svc_serv *serv, + struct svc_pool *pool) { struct svc_rqst *rqstp; @@ -714,6 +718,9 @@ struct svc_rqst *svc_pool_wake_idle_thread(struct svc_pool *pool) return rqstp; } rcu_read_unlock(); + + trace_svc_pool_starved(serv, pool); + percpu_counter_inc(&pool->sp_threads_starved); return NULL; } diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 89302bf09b77..a1ed6fb69793 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -455,7 +455,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt) */ void svc_xprt_enqueue(struct svc_xprt *xprt) { - struct svc_rqst *rqstp; + struct svc_rqst *rqstp; struct svc_pool *pool; if (!svc_xprt_ready(xprt)) @@ -476,9 +476,11 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); spin_unlock_bh(&pool->sp_lock); - rqstp = svc_pool_wake_idle_thread(pool); - if (!rqstp) + rqstp = svc_pool_wake_idle_thread(xprt->xpt_server, pool); + if (!rqstp) { set_bit(SP_CONGESTED, &pool->sp_flags); + return; + } trace_svc_xprt_enqueue(xprt, rqstp); } @@ -584,17 +586,16 @@ static void svc_xprt_release(struct svc_rqst *rqstp) void svc_wake_up(struct svc_serv *serv) { struct svc_pool *pool = &serv->sv_pools[0]; - struct svc_rqst *rqstp; + struct svc_rqst *rqstp; - rqstp = svc_pool_wake_idle_thread(pool); + rqstp = svc_pool_wake_idle_thread(serv, pool); if (!rqstp) { set_bit(SP_TASK_PENDING, &pool->sp_flags); smp_wmb(); - trace_svc_wake_up(0); return; } - trace_svc_wake_up(rqstp->rq_task->pid); + trace_svc_wake_up(rqstp); } EXPORT_SYMBOL_GPL(svc_wake_up); @@ -1436,16 +1437,17 @@ static int svc_pool_stats_show(struct seq_file *m, void *p) struct svc_pool *pool = p; if (p == SEQ_START_TOKEN) { - seq_puts(m, "# pool packets-arrived sockets-enqueued threads-woken threads-timedout\n"); + seq_puts(m, "# pool packets-arrived xprts-enqueued threads-woken threads-timedout starved\n"); return 0; } - seq_printf(m, "%u %llu %llu %llu %llu\n", + seq_printf(m, "%u %llu %llu %llu %llu %llu\n", pool->sp_id, percpu_counter_sum_positive(&pool->sp_sockets_queued), percpu_counter_sum_positive(&pool->sp_sockets_queued), percpu_counter_sum_positive(&pool->sp_threads_woken), - percpu_counter_sum_positive(&pool->sp_threads_timedout)); + percpu_counter_sum_positive(&pool->sp_threads_timedout), + percpu_counter_sum_positive(&pool->sp_threads_starved)); return 0; } From patchwork Mon Jul 10 16:42:13 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13307390 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 8517CEB64DA for ; Mon, 10 Jul 2023 16:42:18 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230245AbjGJQmR (ORCPT ); Mon, 10 Jul 2023 12:42:17 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:45176 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230238AbjGJQmQ (ORCPT ); Mon, 10 Jul 2023 12:42:16 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id F0B99AD for ; Mon, 10 Jul 2023 09:42:15 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 89BD761124 for ; Mon, 10 Jul 2023 16:42:15 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 81DFDC433C8; Mon, 10 Jul 2023 16:42:14 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1689007335; bh=6tgxkoGwbdPWA2kjPEvno7uq9F/lgC1ObR+QkLawEUc=; h=Subject:From:To:Cc:Date:In-Reply-To:References:From; b=ecKq7pFKDiFwIPqhhEETrV2RGneR7uRkLs3apDVw4XI343XTOPtW+PfX52pSKL6da EuDFxX1ZPiViJFifA+c0qBu1Dh9+qOPF3Pz2H5/xxHYNSP7GxzbezifAh8FAAVNsaC gpZ7cm6vmiXouooj/JxjSEqkbMT5sHyFF8I1Wl7HS9zHrRzlbMi70kQ3wCLOyFdUlK Fn2rz2urEKJ4lF0VGvfvpImzSGAADeSVebfqKDEQfY1bX9P2pnwJx0zKMfK/hQaN28 3Tij76G9os9MPoQDlT6CdV2RvLERizJXAVtzG0sE9JQ3u3koZZ9AmBIA1kUWsfFBTP 5nBxiT3Nssbsg== Subject: [PATCH v3 3/9] SUNRPC: Split the svc_xprt_dequeue tracepoint From: Chuck Lever To: linux-nfs@vger.kernel.org Cc: Chuck Lever , lorenzo@kernel.org, neilb@suse.de, jlayton@redhat.com, david@fromorbit.com Date: Mon, 10 Jul 2023 12:42:13 -0400 Message-ID: <168900733359.7514.13132763975249250052.stgit@manet.1015granger.net> In-Reply-To: <168900729243.7514.15141312295052254929.stgit@manet.1015granger.net> References: <168900729243.7514.15141312295052254929.stgit@manet.1015granger.net> User-Agent: StGit/1.5 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-nfs@vger.kernel.org From: Chuck Lever Distinguish between the case where new work was picked up just by looking at the transport queue versus when the thread was awoken. This gives us better visibility about how well-utilized the thread pool is. Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 47 +++++++++++++++++++++++++++++++---------- net/sunrpc/svc_xprt.c | 9 +++++--- 2 files changed, 41 insertions(+), 15 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 9b70fc1c698a..2e83887b58cd 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -2015,34 +2015,57 @@ TRACE_EVENT(svc_xprt_enqueue, SVC_XPRT_ENDPOINT_VARARGS, __entry->pid) ); -TRACE_EVENT(svc_xprt_dequeue, +#define show_svc_pool_flags(x) \ + __print_flags(x, "|", \ + { BIT(SP_TASK_PENDING), "TASK_PENDING" }, \ + { BIT(SP_CONGESTED), "CONGESTED" }) + +DECLARE_EVENT_CLASS(svc_pool_scheduler_class, TP_PROTO( - const struct svc_rqst *rqst + const struct svc_rqst *rqstp ), - TP_ARGS(rqst), + TP_ARGS(rqstp), TP_STRUCT__entry( - SVC_XPRT_ENDPOINT_FIELDS(rqst->rq_xprt) + SVC_XPRT_ENDPOINT_FIELDS(rqstp->rq_xprt) + __string(name, rqstp->rq_server->sv_name) + __field(int, pool_id) + __field(unsigned int, nrthreads) + __field(unsigned long, pool_flags) __field(unsigned long, wakeup) ), TP_fast_assign( - SVC_XPRT_ENDPOINT_ASSIGNMENTS(rqst->rq_xprt); + struct svc_pool *pool = rqstp->rq_pool; + SVC_XPRT_ENDPOINT_ASSIGNMENTS(rqstp->rq_xprt); + __assign_str(name, rqstp->rq_server->sv_name); + __entry->pool_id = pool->sp_id; + __entry->nrthreads = pool->sp_nrthreads; + __entry->pool_flags = pool->sp_flags; __entry->wakeup = ktime_to_us(ktime_sub(ktime_get(), - rqst->rq_qtime)); + rqstp->rq_qtime)); ), - TP_printk(SVC_XPRT_ENDPOINT_FORMAT " wakeup-us=%lu", - SVC_XPRT_ENDPOINT_VARARGS, __entry->wakeup) + TP_printk(SVC_XPRT_ENDPOINT_FORMAT + " service=%s pool=%d pool_flags=%s nrthreads=%u wakeup-us=%lu", + SVC_XPRT_ENDPOINT_VARARGS, __get_str(name), __entry->pool_id, + show_svc_pool_flags(__entry->pool_flags), __entry->nrthreads, + __entry->wakeup + ) ); -#define show_svc_pool_flags(x) \ - __print_flags(x, "|", \ - { BIT(SP_TASK_PENDING), "TASK_PENDING" }, \ - { BIT(SP_CONGESTED), "CONGESTED" }) +#define DEFINE_SVC_POOL_SCHEDULER_EVENT(name) \ + DEFINE_EVENT(svc_pool_scheduler_class, svc_pool_##name, \ + TP_PROTO( \ + const struct svc_rqst *rqstp \ + ), \ + TP_ARGS(rqstp)) + +DEFINE_SVC_POOL_SCHEDULER_EVENT(polled); +DEFINE_SVC_POOL_SCHEDULER_EVENT(awoken); TRACE_EVENT(svc_pool_starved, TP_PROTO( diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index a1ed6fb69793..7ee095d03996 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -744,8 +744,10 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) WARN_ON_ONCE(rqstp->rq_xprt); rqstp->rq_xprt = svc_xprt_dequeue(pool); - if (rqstp->rq_xprt) + if (rqstp->rq_xprt) { + trace_svc_pool_polled(rqstp); goto out_found; + } /* * We have to be able to interrupt this wait @@ -767,8 +769,10 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) set_bit(RQ_BUSY, &rqstp->rq_flags); smp_mb__after_atomic(); rqstp->rq_xprt = svc_xprt_dequeue(pool); - if (rqstp->rq_xprt) + if (rqstp->rq_xprt) { + trace_svc_pool_awoken(rqstp); goto out_found; + } if (!time_left) percpu_counter_inc(&pool->sp_threads_timedout); @@ -784,7 +788,6 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) rqstp->rq_chandle.thread_wait = 5*HZ; else rqstp->rq_chandle.thread_wait = 1*HZ; - trace_svc_xprt_dequeue(rqstp); return rqstp->rq_xprt; } From patchwork Mon Jul 10 16:42:20 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13307391 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 621CCEB64D9 for ; Mon, 10 Jul 2023 16:42:27 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230252AbjGJQm0 (ORCPT ); Mon, 10 Jul 2023 12:42:26 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:45252 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230269AbjGJQmX (ORCPT ); Mon, 10 Jul 2023 12:42:23 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [IPv6:2604:1380:4641:c500::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 9DC8712A for ; Mon, 10 Jul 2023 09:42:22 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 27D386112B for ; Mon, 10 Jul 2023 16:42:22 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 27298C433C8; Mon, 10 Jul 2023 16:42:21 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1689007341; bh=6l5Jt33f9RO3uwq4Kya+V7DLcbvhb749z5nxtKsxQEc=; h=Subject:From:To:Cc:Date:In-Reply-To:References:From; b=TbMqvdXeRaFAGGN0tFIU+wZ3rlEFGH5JWr1/6EDHcwKz7SFE3pbN+g/cNTaPzHem8 zVc/r0tNrk6sJdakV/Jypji7s7n0wI7d1Ovu9oIwsOngy1+X9XCp0UXh+utWYgl8EL soZ4sOaarpgFbzs1PXbLQzcu10AlX4dKvbMDVFGUxihp7awy6V+P7TVD8clvpVw4iG IWqttMqGMMMZ3Mc1DPggqx3/T5iJ0GncspzpKSpfbreputI9GrvtmsV90H0tkB3JUg pjMzPNJWWQKFZd2uQaS/zRFiD6alg7nltne+PszskTSIPG3lRI4iYZDy4Eh57o/jKj KV/oPOgoKMB4Q== Subject: [PATCH v3 4/9] SUNRPC: Count ingress RPC messages per svc_pool From: Chuck Lever To: linux-nfs@vger.kernel.org Cc: Chuck Lever , lorenzo@kernel.org, neilb@suse.de, jlayton@redhat.com, david@fromorbit.com Date: Mon, 10 Jul 2023 12:42:20 -0400 Message-ID: <168900734016.7514.3760096764861612619.stgit@manet.1015granger.net> In-Reply-To: <168900729243.7514.15141312295052254929.stgit@manet.1015granger.net> References: <168900729243.7514.15141312295052254929.stgit@manet.1015granger.net> User-Agent: StGit/1.5 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-nfs@vger.kernel.org From: Chuck Lever svc_xprt_enqueue() can be costly, since it involves selecting and waking up a process. More than one enqueue is done per incoming RPC. For example, svc_data_ready() enqueues, and so does svc_xprt_receive(). Also, if an RPC message requires more than one call to ->recvfrom() to receive it fully, each one of those calls does an enqueue. To get a sense of the average number of transport enqueue operations needed to process an incoming RPC message, re-use the "packets" pool stat. Track the number of complete RPC messages processed by each thread pool. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 1 + net/sunrpc/svc.c | 2 ++ net/sunrpc/svc_xprt.c | 3 ++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index fbfe6ea737c8..74ea13270679 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -38,6 +38,7 @@ struct svc_pool { struct list_head sp_all_threads; /* all server threads */ /* statistics on pool operation */ + struct percpu_counter sp_messages_arrived; struct percpu_counter sp_sockets_queued; struct percpu_counter sp_threads_woken; struct percpu_counter sp_threads_timedout; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index b79b8b41905d..88b7b5fb6d75 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -513,6 +513,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, INIT_LIST_HEAD(&pool->sp_all_threads); spin_lock_init(&pool->sp_lock); + percpu_counter_init(&pool->sp_messages_arrived, 0, GFP_KERNEL); percpu_counter_init(&pool->sp_sockets_queued, 0, GFP_KERNEL); percpu_counter_init(&pool->sp_threads_woken, 0, GFP_KERNEL); percpu_counter_init(&pool->sp_threads_timedout, 0, GFP_KERNEL); @@ -589,6 +590,7 @@ svc_destroy(struct kref *ref) for (i = 0; i < serv->sv_nrpools; i++) { struct svc_pool *pool = &serv->sv_pools[i]; + percpu_counter_destroy(&pool->sp_messages_arrived); percpu_counter_destroy(&pool->sp_sockets_queued); percpu_counter_destroy(&pool->sp_threads_woken); percpu_counter_destroy(&pool->sp_threads_timedout); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 7ee095d03996..ecbccf0d89b9 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -897,6 +897,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) if (serv->sv_stats) serv->sv_stats->netcnt++; + percpu_counter_inc(&rqstp->rq_pool->sp_messages_arrived); rqstp->rq_stime = ktime_get(); return len; out_release: @@ -1446,7 +1447,7 @@ static int svc_pool_stats_show(struct seq_file *m, void *p) seq_printf(m, "%u %llu %llu %llu %llu %llu\n", pool->sp_id, - percpu_counter_sum_positive(&pool->sp_sockets_queued), + percpu_counter_sum_positive(&pool->sp_messages_arrived), percpu_counter_sum_positive(&pool->sp_sockets_queued), percpu_counter_sum_positive(&pool->sp_threads_woken), percpu_counter_sum_positive(&pool->sp_threads_timedout), From patchwork Mon Jul 10 16:42:26 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13307392 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 1B60FEB64D9 for ; Mon, 10 Jul 2023 16:42:35 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230296AbjGJQmd (ORCPT ); Mon, 10 Jul 2023 12:42:33 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:45402 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230281AbjGJQma (ORCPT ); Mon, 10 Jul 2023 12:42:30 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [IPv6:2604:1380:4641:c500::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 4CC0213D for ; Mon, 10 Jul 2023 09:42:29 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id B21356112A for ; Mon, 10 Jul 2023 16:42:28 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id B09CBC433C7; Mon, 10 Jul 2023 16:42:27 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1689007348; bh=0UTojvHk3XK1psFQA6O+HJqio8NK7M/LxGZkI05qJUI=; h=Subject:From:To:Cc:Date:In-Reply-To:References:From; b=fyMH1Ad6GzWeAYEV8qki5VVo9qVDrpazHjO1Yev4KPhzao2qrMemW+po2HlqQS91O kfs0wCCBM1F0W6cTjhY/m60fI8l0BcYh6AxcE3m1KlRoMB60f8s1C72dZOsYK1IzXP R6YCezuElY5gYjhUtlNt6Gps9VrxoLb6QoBBKBuKz9dvtPvkdm04oFcra9Yn/9vYGZ kkDr8enaLyp2/rz2fTD2VssZr1tl0AE6k/jzb/2AxU3Ya/mWsz6vnabaDrLNKYhlnE noMXb+b5tKjs95XltGplGH/U+X03pvXsQ9np7cpBBLn2k1ijTxRRaURmHFeNcENN+n iSfg65cMnGVEQ== Subject: [PATCH v3 5/9] SUNRPC: Count pool threads that were awoken but found no work to do From: Chuck Lever To: linux-nfs@vger.kernel.org Cc: Chuck Lever , lorenzo@kernel.org, neilb@suse.de, jlayton@redhat.com, david@fromorbit.com Date: Mon, 10 Jul 2023 12:42:26 -0400 Message-ID: <168900734678.7514.887270657845753276.stgit@manet.1015granger.net> In-Reply-To: <168900729243.7514.15141312295052254929.stgit@manet.1015granger.net> References: <168900729243.7514.15141312295052254929.stgit@manet.1015granger.net> User-Agent: StGit/1.5 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-nfs@vger.kernel.org From: Chuck Lever Measure a source of thread scheduling inefficiency -- count threads that were awoken but found that the transport queue had already been emptied. An empty transport queue is possible when threads that run between the wake_up_process() call and the woken thread returning from the scheduler have pulled all remaining work off the transport queue using the first svc_xprt_dequeue() in svc_get_next_xprt(). Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 1 + net/sunrpc/svc.c | 2 ++ net/sunrpc/svc_xprt.c | 7 ++++--- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 74ea13270679..9dd3b16cc4c2 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -43,6 +43,7 @@ struct svc_pool { struct percpu_counter sp_threads_woken; struct percpu_counter sp_threads_timedout; struct percpu_counter sp_threads_starved; + struct percpu_counter sp_threads_no_work; #define SP_TASK_PENDING (0) /* still work to do even if no * xprt is queued. */ diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 88b7b5fb6d75..b7a02309ecb1 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -518,6 +518,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, percpu_counter_init(&pool->sp_threads_woken, 0, GFP_KERNEL); percpu_counter_init(&pool->sp_threads_timedout, 0, GFP_KERNEL); percpu_counter_init(&pool->sp_threads_starved, 0, GFP_KERNEL); + percpu_counter_init(&pool->sp_threads_no_work, 0, GFP_KERNEL); } return serv; @@ -595,6 +596,7 @@ svc_destroy(struct kref *ref) percpu_counter_destroy(&pool->sp_threads_woken); percpu_counter_destroy(&pool->sp_threads_timedout); percpu_counter_destroy(&pool->sp_threads_starved); + percpu_counter_destroy(&pool->sp_threads_no_work); } kfree(serv->sv_pools); kfree(serv); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index ecbccf0d89b9..6c2a702aa469 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -776,9 +776,9 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) if (!time_left) percpu_counter_inc(&pool->sp_threads_timedout); - if (signalled() || kthread_should_stop()) return ERR_PTR(-EINTR); + percpu_counter_inc(&pool->sp_threads_no_work); return ERR_PTR(-EAGAIN); out_found: /* Normally we will wait up to 5 seconds for any required @@ -1445,13 +1445,14 @@ static int svc_pool_stats_show(struct seq_file *m, void *p) return 0; } - seq_printf(m, "%u %llu %llu %llu %llu %llu\n", + seq_printf(m, "%u %llu %llu %llu %llu %llu %llu\n", pool->sp_id, percpu_counter_sum_positive(&pool->sp_messages_arrived), percpu_counter_sum_positive(&pool->sp_sockets_queued), percpu_counter_sum_positive(&pool->sp_threads_woken), percpu_counter_sum_positive(&pool->sp_threads_timedout), - percpu_counter_sum_positive(&pool->sp_threads_starved)); + percpu_counter_sum_positive(&pool->sp_threads_starved), + percpu_counter_sum_positive(&pool->sp_threads_no_work)); return 0; } From patchwork Mon Jul 10 16:42:33 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13307393 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id E1622EB64D9 for ; Mon, 10 Jul 2023 16:42:40 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230281AbjGJQmk (ORCPT ); Mon, 10 Jul 2023 12:42:40 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:45566 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230264AbjGJQmj (ORCPT ); Mon, 10 Jul 2023 12:42:39 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id B3981180 for ; Mon, 10 Jul 2023 09:42:35 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 502066101E for ; Mon, 10 Jul 2023 16:42:35 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 4C2CCC433C7; Mon, 10 Jul 2023 16:42:34 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1689007354; bh=xe3T/z5HwAy1RnUi4f9GXor1QkLrb5tcWJETBoFq88I=; h=Subject:From:To:Cc:Date:In-Reply-To:References:From; b=lcgXl0lHYX2Ue8rvjO8Re1erqlxLNZrsGRkMRyvFRYV4ZpOHjTgBV6AASl6t7+md0 9RLxkA0RFImwDfkbBpfEv8TIncFv/bNU4O85ie+eaNC3kbP9okwVeKRtLnHcn6mfbD gl5aT0fwRLynRtjY5AN+99ReLPHQaKgBWooh4Zq+HOvaaun7hLOAYNFdvz22Qfbzu6 hb38d7PonlINBBtFEWIu/WfFIoJG051KLemvztB2RGVcQ8JiGi+yq17OLICnFv5Oqc DqPZfXqh/RmC+zIaKPBgEq06sW8fJv0AW8qqXc+nzhr9oAK3TNf7mUAvVhXKb7wpTs JAstn15YH22iA== Subject: [PATCH v3 6/9] SUNRPC: Clean up svc_set_num_threads From: Chuck Lever To: linux-nfs@vger.kernel.org Cc: Chuck Lever , lorenzo@kernel.org, neilb@suse.de, jlayton@redhat.com, david@fromorbit.com Date: Mon, 10 Jul 2023 12:42:33 -0400 Message-ID: <168900735334.7514.10614943419985083955.stgit@manet.1015granger.net> In-Reply-To: <168900729243.7514.15141312295052254929.stgit@manet.1015granger.net> References: <168900729243.7514.15141312295052254929.stgit@manet.1015granger.net> User-Agent: StGit/1.5 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-nfs@vger.kernel.org From: Chuck Lever Document the API contract and remove stale or obvious comments. Signed-off-by: Chuck Lever --- net/sunrpc/svc.c | 60 +++++++++++++++++++++++------------------------------- 1 file changed, 25 insertions(+), 35 deletions(-) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index b7a02309ecb1..b02a672aaada 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -728,23 +728,14 @@ struct svc_rqst *svc_pool_wake_idle_thread(struct svc_serv *serv, return NULL; } -/* - * Choose a pool in which to create a new thread, for svc_set_num_threads - */ -static inline struct svc_pool * -choose_pool(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) +static struct svc_pool * +svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) { - if (pool != NULL) - return pool; - - return &serv->sv_pools[(*state)++ % serv->sv_nrpools]; + return pool ? pool : &serv->sv_pools[(*state)++ % serv->sv_nrpools]; } -/* - * Choose a thread to kill, for svc_set_num_threads - */ -static inline struct task_struct * -choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) +static struct task_struct * +svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) { unsigned int i; struct task_struct *task = NULL; @@ -752,7 +743,6 @@ choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) if (pool != NULL) { spin_lock_bh(&pool->sp_lock); } else { - /* choose a pool in round-robin fashion */ for (i = 0; i < serv->sv_nrpools; i++) { pool = &serv->sv_pools[--(*state) % serv->sv_nrpools]; spin_lock_bh(&pool->sp_lock); @@ -767,21 +757,15 @@ choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) if (!list_empty(&pool->sp_all_threads)) { struct svc_rqst *rqstp; - /* - * Remove from the pool->sp_all_threads list - * so we don't try to kill it again. - */ rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all); set_bit(RQ_VICTIM, &rqstp->rq_flags); list_del_rcu(&rqstp->rq_all); task = rqstp->rq_task; } spin_unlock_bh(&pool->sp_lock); - return task; } -/* create new threads */ static int svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { @@ -793,13 +777,12 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) do { nrservs--; - chosen_pool = choose_pool(serv, pool, &state); - + chosen_pool = svc_pool_next(serv, pool, &state); node = svc_pool_map_get_node(chosen_pool->sp_id); + rqstp = svc_prepare_thread(serv, chosen_pool, node); if (IS_ERR(rqstp)) return PTR_ERR(rqstp); - task = kthread_create_on_node(serv->sv_threadfn, rqstp, node, "%s", serv->sv_name); if (IS_ERR(task)) { @@ -818,15 +801,6 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) return 0; } -/* - * Create or destroy enough new threads to make the number - * of threads the given number. If `pool' is non-NULL, applies - * only to threads in that pool, otherwise round-robins between - * all pools. Caller must ensure that mutual exclusion between this and - * server startup or shutdown. - */ - -/* destroy old threads */ static int svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { @@ -834,9 +808,8 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) struct task_struct *task; unsigned int state = serv->sv_nrthreads-1; - /* destroy old threads */ do { - task = choose_victim(serv, pool, &state); + task = svc_pool_victim(serv, pool, &state); if (task == NULL) break; rqstp = kthread_data(task); @@ -848,6 +821,23 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) return 0; } +/** + * svc_set_num_threads - adjust number of threads per RPC service + * @serv: RPC service to adjust + * @pool: Specific pool from which to choose threads, or NULL + * @nrservs: New number of threads for @serv (0 or less means kill all threads) + * + * Create or destroy threads to make the number of threads for @serv the + * given number. If @pool is non-NULL, change only threads in that pool; + * otherwise, round-robin between all pools for @serv. @serv's + * sv_nrthreads is adjusted for each thread created or destroyed. + * + * Caller must ensure mutual exclusion between this and server startup or + * shutdown. + * + * Returns zero on success or a negative errno if an error occurred while + * starting a thread. + */ int svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { From patchwork Mon Jul 10 16:42:39 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13307394 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id D1108EB64DA for ; Mon, 10 Jul 2023 16:42:44 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230274AbjGJQmn (ORCPT ); Mon, 10 Jul 2023 12:42:43 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:45620 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230080AbjGJQmn (ORCPT ); Mon, 10 Jul 2023 12:42:43 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [IPv6:2604:1380:4641:c500::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 42328F2 for ; Mon, 10 Jul 2023 09:42:42 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id CBE5461130 for ; Mon, 10 Jul 2023 16:42:41 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id CB499C433C8; Mon, 10 Jul 2023 16:42:40 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1689007361; bh=bBt6O/veulsN6GYsmZXvg5NrlsLaQXmf+NnQFVN+eMI=; h=Subject:From:To:Cc:Date:In-Reply-To:References:From; b=NwrmIjgTxVKP3WKIHVMJZM45cZyJSzT6bqI4eauf12aNc0tg+o/Oab80NHH+RYW9K /NgMdVscdr0NRN6Yo0uwObJu3HNhQRvQUKUAh7PFoc0UFYeMmYJIRRAwhpwwZ66oC9 9YP8wvHBmOwacCMq6c+Wbal1ORBgvByylM7devaljoBudU0z+MxUmtShuGXJMQYobw qLfClaKXw/D/9RXSahyVa62LbgvvEYsVWZiVHN61C1twbRd8BI8k1ad7Yi3lGebTWc fGqkvFUis6RSk7Ifc1x42KBsVnupL4JUCL/Bna4ljPlOCGoWqdJW/TYVPpfi1Ea7AK d0NKeSbeZCUCA== Subject: [PATCH v3 7/9] SUNRPC: Replace dprintk() call site in __svc_create() From: Chuck Lever To: linux-nfs@vger.kernel.org Cc: Chuck Lever , lorenzo@kernel.org, neilb@suse.de, jlayton@redhat.com, david@fromorbit.com Date: Mon, 10 Jul 2023 12:42:39 -0400 Message-ID: <168900735994.7514.15744992636137059327.stgit@manet.1015granger.net> In-Reply-To: <168900729243.7514.15141312295052254929.stgit@manet.1015granger.net> References: <168900729243.7514.15141312295052254929.stgit@manet.1015granger.net> User-Agent: StGit/1.5 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-nfs@vger.kernel.org From: Chuck Lever Done as part of converting SunRPC observability from printk-style to tracepoints. Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 23 +++++++++++++++++++++++ net/sunrpc/svc.c | 5 ++--- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 2e83887b58cd..60c8e03268d4 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1918,6 +1918,29 @@ TRACE_EVENT(svc_stats_latency, __get_str(procedure), __entry->execute) ); +TRACE_EVENT(svc_pool_init, + TP_PROTO( + const struct svc_serv *serv, + const struct svc_pool *pool + ), + + TP_ARGS(serv, pool), + + TP_STRUCT__entry( + __string(name, serv->sv_name) + __field(int, pool_id) + ), + + TP_fast_assign( + __assign_str(name, serv->sv_name); + __entry->pool_id = pool->sp_id; + ), + + TP_printk("service=%s pool=%d", + __get_str(name), __entry->pool_id + ) +); + #define show_svc_xprt_flags(flags) \ __print_flags(flags, "|", \ { BIT(XPT_BUSY), "BUSY" }, \ diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index b02a672aaada..ad29df00b454 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -505,9 +505,6 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, for (i = 0; i < serv->sv_nrpools; i++) { struct svc_pool *pool = &serv->sv_pools[i]; - dprintk("svc: initialising pool %u for %s\n", - i, serv->sv_name); - pool->sp_id = i; INIT_LIST_HEAD(&pool->sp_sockets); INIT_LIST_HEAD(&pool->sp_all_threads); @@ -519,6 +516,8 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, percpu_counter_init(&pool->sp_threads_timedout, 0, GFP_KERNEL); percpu_counter_init(&pool->sp_threads_starved, 0, GFP_KERNEL); percpu_counter_init(&pool->sp_threads_no_work, 0, GFP_KERNEL); + + trace_svc_pool_init(serv, pool); } return serv; From patchwork Mon Jul 10 16:42:46 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13307395 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 85250EB64DA for ; Mon, 10 Jul 2023 16:42:53 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230080AbjGJQmw (ORCPT ); Mon, 10 Jul 2023 12:42:52 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:45702 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230264AbjGJQmv (ORCPT ); Mon, 10 Jul 2023 12:42:51 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C72C712A for ; Mon, 10 Jul 2023 09:42:48 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 5A6B76112D for ; Mon, 10 Jul 2023 16:42:48 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 57C27C433C7; Mon, 10 Jul 2023 16:42:47 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1689007367; bh=dqk/W/6WaAunlVElBTT0JtgGPudT/Pnhi+ChrsuAQJ0=; h=Subject:From:To:Cc:Date:In-Reply-To:References:From; b=K/6ss7hc8hnyykQzmqf0lh/5qXSN3iNPf41fa4yHUO9rWdLj2a5Ynv7yijokt2w/s tsa5X/HbxlgK30u2aGDO/E9hiwqpzayNjrAhqoVHCv6HTlk2HAkd/m1OFGPnZZCyJ/ 7uSXy+YvtgIJ2joYRERJdG5rkdeeInSUk0P8R6SeukUqNlx3PLUtazCnM//iP9hUIB OAmr8p5c4Sg4iFj0+1lRWMtYRPzssvax7C7+j6PjCjROSVOtckITdHGdCanhdkPDjZ 7Cryyld/zq+3729YFzcrLBp6rsuVI459MlpxDXjFGwIaYCrZZij03RkgFPBdRjrYcQ ccJO22NwvJFlw== Subject: [PATCH v3 8/9] SUNRPC: Replace sp_threads_all with an xarray From: Chuck Lever To: linux-nfs@vger.kernel.org Cc: Chuck Lever , lorenzo@kernel.org, neilb@suse.de, jlayton@redhat.com, david@fromorbit.com Date: Mon, 10 Jul 2023 12:42:46 -0400 Message-ID: <168900736644.7514.16807799597793601214.stgit@manet.1015granger.net> In-Reply-To: <168900729243.7514.15141312295052254929.stgit@manet.1015granger.net> References: <168900729243.7514.15141312295052254929.stgit@manet.1015granger.net> User-Agent: StGit/1.5 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-nfs@vger.kernel.org From: Chuck Lever We want a thread lookup operation that can be done with RCU only, but also we want to avoid the linked-list walk, which does not scale well in the number of pool threads. This patch splits out the use of the sp_lock to protect the set of threads. Svc thread information is now protected by the xarray's lock (when making thread count changes) and the RCU read lock (when only looking up a thread). Since thread count changes are done only via nfsd filesystem API, which runs only in process context, we can safely dispense with the use of a bottom-half-disabled lock. Signed-off-by: Chuck Lever --- fs/nfsd/nfssvc.c | 3 +- include/linux/sunrpc/svc.h | 11 +++---- include/trace/events/sunrpc.h | 47 ++++++++++++++++++++++++++++- net/sunrpc/svc.c | 67 +++++++++++++++++++++++++---------------- net/sunrpc/svc_xprt.c | 2 + 5 files changed, 94 insertions(+), 36 deletions(-) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 2154fa63c5f2..d42b2a40c93c 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -62,8 +62,7 @@ static __be32 nfsd_init_request(struct svc_rqst *, * If (out side the lock) nn->nfsd_serv is non-NULL, then it must point to a * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0 (unless * nn->keep_active is set). That number of nfsd threads must - * exist and each must be listed in ->sp_all_threads in some entry of - * ->sv_pools[]. + * exist and each must be listed in some entry of ->sv_pools[]. * * Each active thread holds a counted reference on nn->nfsd_serv, as does * the nn->keep_active flag and various transient calls to svc_get(). diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 9dd3b16cc4c2..86377506a514 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -32,10 +32,10 @@ */ struct svc_pool { unsigned int sp_id; /* pool id; also node id on NUMA */ - spinlock_t sp_lock; /* protects all fields */ + spinlock_t sp_lock; /* protects sp_sockets */ struct list_head sp_sockets; /* pending sockets */ unsigned int sp_nrthreads; /* # of threads in pool */ - struct list_head sp_all_threads; /* all server threads */ + struct xarray sp_thread_xa; /* statistics on pool operation */ struct percpu_counter sp_messages_arrived; @@ -196,7 +196,6 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp); * processed. */ struct svc_rqst { - struct list_head rq_all; /* all threads list */ struct rcu_head rq_rcu_head; /* for RCU deferred kfree */ struct svc_xprt * rq_xprt; /* transport ptr */ @@ -241,10 +240,10 @@ struct svc_rqst { #define RQ_SPLICE_OK (4) /* turned off in gss privacy * to prevent encrypting page * cache pages */ -#define RQ_VICTIM (5) /* about to be shut down */ -#define RQ_BUSY (6) /* request is busy */ -#define RQ_DATA (7) /* request has data */ +#define RQ_BUSY (5) /* request is busy */ +#define RQ_DATA (6) /* request has data */ unsigned long rq_flags; /* flags field */ + u32 rq_thread_id; /* xarray index */ ktime_t rq_qtime; /* enqueue time */ void * rq_argp; /* decoded arguments */ diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 60c8e03268d4..ea43c6059bdb 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1676,7 +1676,6 @@ DEFINE_SVCXDRBUF_EVENT(sendto); svc_rqst_flag(USEDEFERRAL) \ svc_rqst_flag(DROPME) \ svc_rqst_flag(SPLICE_OK) \ - svc_rqst_flag(VICTIM) \ svc_rqst_flag(BUSY) \ svc_rqst_flag_end(DATA) @@ -2118,6 +2117,52 @@ TRACE_EVENT(svc_pool_starved, ) ); +DECLARE_EVENT_CLASS(svc_thread_lifetime_class, + TP_PROTO( + const struct svc_serv *serv, + const struct svc_pool *pool, + const struct svc_rqst *rqstp + ), + + TP_ARGS(serv, pool, rqstp), + + TP_STRUCT__entry( + __string(name, serv->sv_name) + __field(int, pool_id) + __field(unsigned int, nrthreads) + __field(unsigned long, pool_flags) + __field(u32, thread_id) + __field(const void *, rqstp) + ), + + TP_fast_assign( + __assign_str(name, serv->sv_name); + __entry->pool_id = pool->sp_id; + __entry->nrthreads = pool->sp_nrthreads; + __entry->pool_flags = pool->sp_flags; + __entry->thread_id = rqstp->rq_thread_id; + __entry->rqstp = rqstp; + ), + + TP_printk("service=%s pool=%d pool_flags=%s nrthreads=%u thread_id=%u", + __get_str(name), __entry->pool_id, + show_svc_pool_flags(__entry->pool_flags), + __entry->nrthreads, __entry->thread_id + ) +); + +#define DEFINE_SVC_THREAD_LIFETIME_EVENT(name) \ + DEFINE_EVENT(svc_thread_lifetime_class, svc_pool_##name, \ + TP_PROTO( \ + const struct svc_serv *serv, \ + const struct svc_pool *pool, \ + const struct svc_rqst *rqstp \ + ), \ + TP_ARGS(serv, pool, rqstp)) + +DEFINE_SVC_THREAD_LIFETIME_EVENT(thread_init); +DEFINE_SVC_THREAD_LIFETIME_EVENT(thread_exit); + DECLARE_EVENT_CLASS(svc_xprt_event, TP_PROTO( const struct svc_xprt *xprt diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index ad29df00b454..109d7f047385 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -507,8 +507,8 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, pool->sp_id = i; INIT_LIST_HEAD(&pool->sp_sockets); - INIT_LIST_HEAD(&pool->sp_all_threads); spin_lock_init(&pool->sp_lock); + xa_init_flags(&pool->sp_thread_xa, XA_FLAGS_ALLOC); percpu_counter_init(&pool->sp_messages_arrived, 0, GFP_KERNEL); percpu_counter_init(&pool->sp_sockets_queued, 0, GFP_KERNEL); @@ -596,6 +596,8 @@ svc_destroy(struct kref *ref) percpu_counter_destroy(&pool->sp_threads_timedout); percpu_counter_destroy(&pool->sp_threads_starved); percpu_counter_destroy(&pool->sp_threads_no_work); + + xa_destroy(&pool->sp_thread_xa); } kfree(serv->sv_pools); kfree(serv); @@ -676,7 +678,11 @@ EXPORT_SYMBOL_GPL(svc_rqst_alloc); static struct svc_rqst * svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) { + struct xa_limit limit = { + .max = U32_MAX, + }; struct svc_rqst *rqstp; + int ret; rqstp = svc_rqst_alloc(serv, pool, node); if (!rqstp) @@ -687,11 +693,21 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) serv->sv_nrthreads += 1; spin_unlock_bh(&serv->sv_lock); - spin_lock_bh(&pool->sp_lock); + xa_lock(&pool->sp_thread_xa); + ret = __xa_alloc(&pool->sp_thread_xa, &rqstp->rq_thread_id, rqstp, + limit, GFP_KERNEL); + if (ret) { + xa_unlock(&pool->sp_thread_xa); + goto out_free; + } pool->sp_nrthreads++; - list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads); - spin_unlock_bh(&pool->sp_lock); + xa_unlock(&pool->sp_thread_xa); + trace_svc_pool_thread_init(serv, pool, rqstp); return rqstp; + +out_free: + svc_rqst_free(rqstp); + return ERR_PTR(ret); } /** @@ -708,19 +724,17 @@ struct svc_rqst *svc_pool_wake_idle_thread(struct svc_serv *serv, struct svc_pool *pool) { struct svc_rqst *rqstp; + unsigned long index; - rcu_read_lock(); - list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { + xa_for_each(&pool->sp_thread_xa, index, rqstp) { if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) continue; - rcu_read_unlock(); WRITE_ONCE(rqstp->rq_qtime, ktime_get()); wake_up_process(rqstp->rq_task); percpu_counter_inc(&pool->sp_threads_woken); return rqstp; } - rcu_read_unlock(); trace_svc_pool_starved(serv, pool); percpu_counter_inc(&pool->sp_threads_starved); @@ -736,32 +750,33 @@ svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) static struct task_struct * svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) { - unsigned int i; struct task_struct *task = NULL; + struct svc_rqst *rqstp; + unsigned int i; if (pool != NULL) { - spin_lock_bh(&pool->sp_lock); + xa_lock(&pool->sp_thread_xa); + if (!pool->sp_nrthreads) + goto out; } else { for (i = 0; i < serv->sv_nrpools; i++) { pool = &serv->sv_pools[--(*state) % serv->sv_nrpools]; - spin_lock_bh(&pool->sp_lock); - if (!list_empty(&pool->sp_all_threads)) + xa_lock(&pool->sp_thread_xa); + if (pool->sp_nrthreads) goto found_pool; - spin_unlock_bh(&pool->sp_lock); + xa_unlock(&pool->sp_thread_xa); } return NULL; } found_pool: - if (!list_empty(&pool->sp_all_threads)) { - struct svc_rqst *rqstp; - - rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all); - set_bit(RQ_VICTIM, &rqstp->rq_flags); - list_del_rcu(&rqstp->rq_all); + rqstp = xa_load(&pool->sp_thread_xa, pool->sp_nrthreads - 1); + if (rqstp) { + __xa_erase(&pool->sp_thread_xa, rqstp->rq_thread_id); task = rqstp->rq_task; } - spin_unlock_bh(&pool->sp_lock); +out: + xa_unlock(&pool->sp_thread_xa); return task; } @@ -843,9 +858,9 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) if (pool == NULL) { nrservs -= serv->sv_nrthreads; } else { - spin_lock_bh(&pool->sp_lock); + xa_lock(&pool->sp_thread_xa); nrservs -= pool->sp_nrthreads; - spin_unlock_bh(&pool->sp_lock); + xa_unlock(&pool->sp_thread_xa); } if (nrservs > 0) @@ -932,11 +947,11 @@ svc_exit_thread(struct svc_rqst *rqstp) struct svc_serv *serv = rqstp->rq_server; struct svc_pool *pool = rqstp->rq_pool; - spin_lock_bh(&pool->sp_lock); + xa_lock(&pool->sp_thread_xa); pool->sp_nrthreads--; - if (!test_and_set_bit(RQ_VICTIM, &rqstp->rq_flags)) - list_del_rcu(&rqstp->rq_all); - spin_unlock_bh(&pool->sp_lock); + __xa_erase(&pool->sp_thread_xa, rqstp->rq_thread_id); + xa_unlock(&pool->sp_thread_xa); + trace_svc_pool_thread_exit(serv, pool, rqstp); spin_lock_bh(&serv->sv_lock); serv->sv_nrthreads -= 1; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 6c2a702aa469..db40f771b60a 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -46,7 +46,7 @@ static LIST_HEAD(svc_xprt_class_list); /* SMP locking strategy: * - * svc_pool->sp_lock protects most of the fields of that pool. + * svc_pool->sp_lock protects sp_sockets. * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. * when both need to be taken (rare), svc_serv->sv_lock is first. * The "service mutex" protects svc_serv->sv_nrthread. From patchwork Mon Jul 10 16:42:52 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13307396 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id D047CEB64DA for ; Mon, 10 Jul 2023 16:43:04 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230289AbjGJQnD (ORCPT ); Mon, 10 Jul 2023 12:43:03 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:45762 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230300AbjGJQm4 (ORCPT ); Mon, 10 Jul 2023 12:42:56 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [IPv6:2604:1380:4641:c500::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 586A5120 for ; Mon, 10 Jul 2023 09:42:55 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id E26176112E for ; Mon, 10 Jul 2023 16:42:54 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id DBB3BC433C7; Mon, 10 Jul 2023 16:42:53 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1689007374; bh=JbHD/rU7Mj0Td0pbaqlWP5W3I3wsehbQq5EqN6GbxDk=; h=Subject:From:To:Cc:Date:In-Reply-To:References:From; b=gvi9gG5FJm6xKvEkfeVBZJ+wzyGU2knogKx2gJZcThLYyvlhsHPil4OiSjfvTqveq 01pMGVwev+lRniEIrDFP39wQru+BSfcphe8Ad9Ayl7EXJefY93wz/HJ9lHpTrZ2+R6 sgMa/NNef3ryuOaIJvGWk41LZjraQZLyNN59f5SwZcX9WYPjJ74MHQpTWRk7JymnN1 FxZ3JWiw/TXvVKfcafW8YKzFMq85nq0PcyIFA33zgbBFgTHIztz7S9Vj3FvzWaQQ4P CxyLlXUHNowTreAJ3tGwRc72AHHmBS3uc8UMJU2V7EZ1Pj2/5tnMdiU3/yXxtw9rSW zmmQv7rCPBvvQ== Subject: [PATCH v3 9/9] SUNRPC: Convert RQ_BUSY into a per-pool bitmap From: Chuck Lever To: linux-nfs@vger.kernel.org Cc: Chuck Lever , lorenzo@kernel.org, neilb@suse.de, jlayton@redhat.com, david@fromorbit.com Date: Mon, 10 Jul 2023 12:42:52 -0400 Message-ID: <168900737297.7514.333293207540036098.stgit@manet.1015granger.net> In-Reply-To: <168900729243.7514.15141312295052254929.stgit@manet.1015granger.net> References: <168900729243.7514.15141312295052254929.stgit@manet.1015granger.net> User-Agent: StGit/1.5 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-nfs@vger.kernel.org From: Chuck Lever I've noticed that client-observed server request latency goes up simply when the nfsd thread count is increased. Walking the whole set of pool threads is memory-inefficient. On a busy server with many threads, enqueuing a transport will visit all the threads in the pool quite frequently. This also pulls in the cache lines for some hot fields in each svc_rqst (namely, rq_flags). The svc_xprt_enqueue() call that concerns me most is the one in svc_rdma_wc_receive(), which is single-threaded per CQ. Slowing down completion handling limits the total throughput per RDMA connection. Instead, set up a busy bitmap and use find_next_clear_bit, which should work the same way as RQ_BUSY but will touch only the cache lines that the bitmap is in. Stick with atomic bit operations to avoid taking a spinlock during the search. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 6 ++++-- include/trace/events/sunrpc.h | 1 - net/sunrpc/svc.c | 24 +++++++++++++++++++----- net/sunrpc/svc_xprt.c | 26 ++++++++++++++++++++------ 4 files changed, 43 insertions(+), 14 deletions(-) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 86377506a514..6669f3eb9ed4 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -35,6 +35,7 @@ struct svc_pool { spinlock_t sp_lock; /* protects sp_sockets */ struct list_head sp_sockets; /* pending sockets */ unsigned int sp_nrthreads; /* # of threads in pool */ + unsigned long *sp_busy_map; /* running threads */ struct xarray sp_thread_xa; /* statistics on pool operation */ @@ -191,6 +192,8 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp); #define RPCSVC_MAXPAGES ((RPCSVC_MAXPAYLOAD+PAGE_SIZE-1)/PAGE_SIZE \ + 2 + 1) +#define RPCSVC_MAXPOOLTHREADS (4096) + /* * The context of a single thread, including the request currently being * processed. @@ -240,8 +243,7 @@ struct svc_rqst { #define RQ_SPLICE_OK (4) /* turned off in gss privacy * to prevent encrypting page * cache pages */ -#define RQ_BUSY (5) /* request is busy */ -#define RQ_DATA (6) /* request has data */ +#define RQ_DATA (5) /* request has data */ unsigned long rq_flags; /* flags field */ u32 rq_thread_id; /* xarray index */ ktime_t rq_qtime; /* enqueue time */ diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index ea43c6059bdb..c07824a254bf 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1676,7 +1676,6 @@ DEFINE_SVCXDRBUF_EVENT(sendto); svc_rqst_flag(USEDEFERRAL) \ svc_rqst_flag(DROPME) \ svc_rqst_flag(SPLICE_OK) \ - svc_rqst_flag(BUSY) \ svc_rqst_flag_end(DATA) #undef svc_rqst_flag diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 109d7f047385..f6305b66fd28 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -509,6 +509,12 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, INIT_LIST_HEAD(&pool->sp_sockets); spin_lock_init(&pool->sp_lock); xa_init_flags(&pool->sp_thread_xa, XA_FLAGS_ALLOC); + pool->sp_busy_map = + bitmap_alloc_node(RPCSVC_MAXPOOLTHREADS, GFP_KERNEL, + svc_pool_map_get_node(i)); + if (!pool->sp_busy_map) + return NULL; + bitmap_fill(pool->sp_busy_map, RPCSVC_MAXPOOLTHREADS); percpu_counter_init(&pool->sp_messages_arrived, 0, GFP_KERNEL); percpu_counter_init(&pool->sp_sockets_queued, 0, GFP_KERNEL); @@ -598,6 +604,8 @@ svc_destroy(struct kref *ref) percpu_counter_destroy(&pool->sp_threads_no_work); xa_destroy(&pool->sp_thread_xa); + bitmap_free(pool->sp_busy_map); + pool->sp_busy_map = NULL; } kfree(serv->sv_pools); kfree(serv); @@ -649,7 +657,6 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node) folio_batch_init(&rqstp->rq_fbatch); - __set_bit(RQ_BUSY, &rqstp->rq_flags); rqstp->rq_server = serv; rqstp->rq_pool = pool; @@ -679,7 +686,7 @@ static struct svc_rqst * svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) { struct xa_limit limit = { - .max = U32_MAX, + .max = RPCSVC_MAXPOOLTHREADS, }; struct svc_rqst *rqstp; int ret; @@ -724,12 +731,19 @@ struct svc_rqst *svc_pool_wake_idle_thread(struct svc_serv *serv, struct svc_pool *pool) { struct svc_rqst *rqstp; - unsigned long index; + unsigned long bit; - xa_for_each(&pool->sp_thread_xa, index, rqstp) { - if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) + /* Check the pool's idle bitmap locklessly so that multiple + * idle searches can proceed concurrently. + */ + for_each_clear_bit(bit, pool->sp_busy_map, pool->sp_nrthreads) { + if (test_and_set_bit(bit, pool->sp_busy_map)) continue; + rqstp = xa_load(&pool->sp_thread_xa, bit); + if (!rqstp) + break; + WRITE_ONCE(rqstp->rq_qtime, ktime_get()); wake_up_process(rqstp->rq_task); percpu_counter_inc(&pool->sp_threads_woken); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index db40f771b60a..f9c9babe0cba 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -735,6 +735,21 @@ rqst_should_sleep(struct svc_rqst *rqstp) return true; } +static void svc_pool_thread_mark_idle(struct svc_pool *pool, + struct svc_rqst *rqstp) +{ + clear_bit_unlock(rqstp->rq_thread_id, pool->sp_busy_map); +} + +/* + * Note: If we were awoken, then this rqstp has already been marked busy. + */ +static void svc_pool_thread_mark_busy(struct svc_pool *pool, + struct svc_rqst *rqstp) +{ + test_and_set_bit_lock(rqstp->rq_thread_id, pool->sp_busy_map); +} + static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) { struct svc_pool *pool = rqstp->rq_pool; @@ -756,18 +771,17 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) set_current_state(TASK_INTERRUPTIBLE); smp_mb__before_atomic(); clear_bit(SP_CONGESTED, &pool->sp_flags); - clear_bit(RQ_BUSY, &rqstp->rq_flags); - smp_mb__after_atomic(); - if (likely(rqst_should_sleep(rqstp))) + if (likely(rqst_should_sleep(rqstp))) { + svc_pool_thread_mark_idle(pool, rqstp); time_left = schedule_timeout(timeout); - else + } else __set_current_state(TASK_RUNNING); try_to_freeze(); - set_bit(RQ_BUSY, &rqstp->rq_flags); - smp_mb__after_atomic(); + svc_pool_thread_mark_busy(pool, rqstp); + rqstp->rq_xprt = svc_xprt_dequeue(pool); if (rqstp->rq_xprt) { trace_svc_pool_awoken(rqstp);