From patchwork Mon Nov 27 16:33:24 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13469939 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id C620837156; Mon, 27 Nov 2023 16:33:25 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="NRnllNng" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 011F7C433C7; Mon, 27 Nov 2023 16:33:24 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1701102805; bh=auOVN/6eo+7yifu9BNxyrMOcRTbDXMD0quxKm4nvEfE=; h=Subject:From:To:Cc:Date:In-Reply-To:References:From; b=NRnllNngkBcVu9eqadZ4EstGo2aNMmPSqOuti2jCUaO9STxNgHvDxjr4M5Z2Vr+Xu tu7OMu0+RyUZroisVd2eBJB8wbLIv6+sjRm1rusaRCHpWRNP7BO3E1UE2nnNykb3ZD C/TnnbUF6BNh2hlvOR2TfctNUkUyVBreX5v4kdsFGq5sqro5fivf9Mhj4Y3clul9nf 3gyzv6+aNg1IQ5s1TIpLVrXY72T0a4MmhnL6d4qbh6yk0JwsvJsGcfk2cod5KPjAYv xkfZSP2dJXTywVZbmb/waHeb+vf5Uvw0KAgqnnLP5DwFRgSZttlTOlc6mNHjxZcdae X8z4Cx8eCPI9A== Subject: [PATCH v1 1/5] svcrdma: Add lockdep class keys for transport locks From: Chuck Lever To: linux-nfs@vger.kernel.org, linux-rdma@vger.kernel.org Cc: Chuck Lever , tom@talpey.com Date: Mon, 27 Nov 2023 11:33:24 -0500 Message-ID: <170110280401.49524.10461585339296704465.stgit@bazille.1015granger.net> In-Reply-To: <170110267835.49524.14512830016966273991.stgit@bazille.1015granger.net> References: <170110267835.49524.14512830016966273991.stgit@bazille.1015granger.net> User-Agent: StGit/1.5 Precedence: bulk X-Mailing-List: linux-rdma@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Chuck Lever Two svcrdma-related transport locks can become quite contended. Collate their use and make them easy to find in /proc/lock_stat for better observability. Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_transport.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index c046916df007..3826da1c15f3 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -125,6 +125,9 @@ static void qp_event_handler(struct ib_event *event, void *context) static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv, struct net *net, int node) { + static struct lock_class_key svcrdma_rwctx_lock; + static struct lock_class_key svcrdma_sctx_lock; + static struct lock_class_key svcrdma_dto_lock; struct svcxprt_rdma *cma_xprt; cma_xprt = kzalloc_node(sizeof(*cma_xprt), GFP_KERNEL, node); @@ -141,8 +144,11 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv, spin_lock_init(&cma_xprt->sc_lock); spin_lock_init(&cma_xprt->sc_rq_dto_lock); + lockdep_set_class(&cma_xprt->sc_rq_dto_lock, &svcrdma_dto_lock); spin_lock_init(&cma_xprt->sc_send_lock); + lockdep_set_class(&cma_xprt->sc_send_lock, &svcrdma_sctx_lock); spin_lock_init(&cma_xprt->sc_rw_ctxt_lock); + lockdep_set_class(&cma_xprt->sc_rw_ctxt_lock, &svcrdma_rwctx_lock); /* * Note that this implies that the underlying transport support From patchwork Mon Nov 27 16:33:30 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13469940 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 08926381B8; Mon, 27 Nov 2023 16:33:32 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="k4bgn5xJ" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 82869C433C9; Mon, 27 Nov 2023 16:33:31 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1701102811; bh=juU6fnl4PfxG24GSF7q6GNBCMl+rBTJajn8+8aZpaCo=; h=Subject:From:To:Cc:Date:In-Reply-To:References:From; b=k4bgn5xJv6TRuHOTEstdMEkySE+eI2Paa6k34C0410c3NKHN9QAcE53dMZ5UrV/SY s0Nl+iFoHgoiF2+2x6uy3O6ue2ovJQ0u0RsBPZE5TZt7OzZGhqjlgr9VB26vyViNTn wsZ2hJoEtM07YP2uY44sXU0ixi/BZiti3ePCtly45cZPFi/c5m/mG3b2cLL5NdgxbT Jon4UM29FdqQlcj5JyaRo90DhCe0x7yadyf0p1RTb3zjM8Q9ozQsNv5NKPnUqHkCpO G+dQn3RQAFvhWAhn5JIBr+O/IZ7pxd4uEsZx/rsVO0/vBIA9B87ilSm2Nh3CyBeu8g I+/wMpKsOgDGw== Subject: [PATCH v1 2/5] rpcrdma: Introduce a simple cid tracepoint class From: Chuck Lever To: linux-nfs@vger.kernel.org, linux-rdma@vger.kernel.org Cc: Chuck Lever , tom@talpey.com Date: Mon, 27 Nov 2023 11:33:30 -0500 Message-ID: <170110281051.49524.8827985545385062428.stgit@bazille.1015granger.net> In-Reply-To: <170110267835.49524.14512830016966273991.stgit@bazille.1015granger.net> References: <170110267835.49524.14512830016966273991.stgit@bazille.1015granger.net> User-Agent: StGit/1.5 Precedence: bulk X-Mailing-List: linux-rdma@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Chuck Lever De-duplicate some code, making it easier to add new tracepoints that report only a completion ID. Signed-off-by: Chuck Lever --- include/trace/events/rpcrdma.h | 93 +++++++++---------------------- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 2 - net/sunrpc/xprtrdma/svc_rdma_rw.c | 2 - net/sunrpc/xprtrdma/svc_rdma_sendto.c | 2 - net/sunrpc/xprtrdma/verbs.c | 2 - 5 files changed, 30 insertions(+), 71 deletions(-) diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 718df1d9b834..b3445e07c151 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -22,47 +22,37 @@ ** Event classes **/ -DECLARE_EVENT_CLASS(rpcrdma_completion_class, +DECLARE_EVENT_CLASS(rpcrdma_simple_cid_class, TP_PROTO( - const struct ib_wc *wc, const struct rpc_rdma_cid *cid ), - TP_ARGS(wc, cid), + TP_ARGS(cid), TP_STRUCT__entry( __field(u32, cq_id) __field(int, completion_id) - __field(unsigned long, status) - __field(unsigned int, vendor_err) ), TP_fast_assign( __entry->cq_id = cid->ci_queue_id; __entry->completion_id = cid->ci_completion_id; - __entry->status = wc->status; - if (wc->status) - __entry->vendor_err = wc->vendor_err; - else - __entry->vendor_err = 0; ), - TP_printk("cq.id=%u cid=%d status=%s (%lu/0x%x)", - __entry->cq_id, __entry->completion_id, - rdma_show_wc_status(__entry->status), - __entry->status, __entry->vendor_err + TP_printk("cq.id=%d cid=%d", + __entry->cq_id, __entry->completion_id ) ); -#define DEFINE_COMPLETION_EVENT(name) \ - DEFINE_EVENT(rpcrdma_completion_class, name, \ +#define DEFINE_SIMPLE_CID_EVENT(name) \ + DEFINE_EVENT(rpcrdma_simple_cid_class, name, \ TP_PROTO( \ - const struct ib_wc *wc, \ const struct rpc_rdma_cid *cid \ ), \ - TP_ARGS(wc, cid)) + TP_ARGS(cid) \ + ) -DECLARE_EVENT_CLASS(rpcrdma_send_completion_class, +DECLARE_EVENT_CLASS(rpcrdma_completion_class, TP_PROTO( const struct ib_wc *wc, const struct rpc_rdma_cid *cid @@ -73,20 +63,29 @@ DECLARE_EVENT_CLASS(rpcrdma_send_completion_class, TP_STRUCT__entry( __field(u32, cq_id) __field(int, completion_id) + __field(unsigned long, status) + __field(unsigned int, vendor_err) ), TP_fast_assign( __entry->cq_id = cid->ci_queue_id; __entry->completion_id = cid->ci_completion_id; + __entry->status = wc->status; + if (wc->status) + __entry->vendor_err = wc->vendor_err; + else + __entry->vendor_err = 0; ), - TP_printk("cq.id=%u cid=%d", - __entry->cq_id, __entry->completion_id + TP_printk("cq.id=%u cid=%d status=%s (%lu/0x%x)", + __entry->cq_id, __entry->completion_id, + rdma_show_wc_status(__entry->status), + __entry->status, __entry->vendor_err ) ); -#define DEFINE_SEND_COMPLETION_EVENT(name) \ - DEFINE_EVENT(rpcrdma_send_completion_class, name, \ +#define DEFINE_COMPLETION_EVENT(name) \ + DEFINE_EVENT(rpcrdma_completion_class, name, \ TP_PROTO( \ const struct ib_wc *wc, \ const struct rpc_rdma_cid *cid \ @@ -978,27 +977,7 @@ TRACE_EVENT(xprtrdma_post_send_err, ) ); -TRACE_EVENT(xprtrdma_post_recv, - TP_PROTO( - const struct rpcrdma_rep *rep - ), - - TP_ARGS(rep), - - TP_STRUCT__entry( - __field(u32, cq_id) - __field(int, completion_id) - ), - - TP_fast_assign( - __entry->cq_id = rep->rr_cid.ci_queue_id; - __entry->completion_id = rep->rr_cid.ci_completion_id; - ), - - TP_printk("cq.id=%d cid=%d", - __entry->cq_id, __entry->completion_id - ) -); +DEFINE_SIMPLE_CID_EVENT(xprtrdma_post_recv); TRACE_EVENT(xprtrdma_post_recvs, TP_PROTO( @@ -2020,31 +1999,11 @@ TRACE_EVENT(svcrdma_post_send, ) ); -DEFINE_SEND_COMPLETION_EVENT(svcrdma_wc_send); +DEFINE_SIMPLE_CID_EVENT(svcrdma_wc_send); DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_send_flush); DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_send_err); -TRACE_EVENT(svcrdma_post_recv, - TP_PROTO( - const struct svc_rdma_recv_ctxt *ctxt - ), - - TP_ARGS(ctxt), - - TP_STRUCT__entry( - __field(u32, cq_id) - __field(int, completion_id) - ), - - TP_fast_assign( - __entry->cq_id = ctxt->rc_cid.ci_queue_id; - __entry->completion_id = ctxt->rc_cid.ci_completion_id; - ), - - TP_printk("cq.id=%d cid=%d", - __entry->cq_id, __entry->completion_id - ) -); +DEFINE_SIMPLE_CID_EVENT(svcrdma_post_recv); DEFINE_RECEIVE_SUCCESS_EVENT(svcrdma_wc_recv); DEFINE_RECEIVE_FLUSH_EVENT(svcrdma_wc_recv_flush); @@ -2153,7 +2112,7 @@ TRACE_EVENT(svcrdma_wc_read, DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_read_flush); DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_read_err); -DEFINE_SEND_COMPLETION_EVENT(svcrdma_wc_write); +DEFINE_SIMPLE_CID_EVENT(svcrdma_wc_write); DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_write_flush); DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_write_err); diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index c8c1c534070b..72374033bb2b 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -264,7 +264,7 @@ static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma, if (!ctxt) break; - trace_svcrdma_post_recv(ctxt); + trace_svcrdma_post_recv(&ctxt->rc_cid); ctxt->rc_recv_wr.next = recv_chain; recv_chain = &ctxt->rc_recv_wr; rdma->sc_pending_recvs++; diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index de1ec3220aab..db2a4bd2f7ad 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -282,7 +282,7 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) switch (wc->status) { case IB_WC_SUCCESS: - trace_svcrdma_wc_write(wc, &cc->cc_cid); + trace_svcrdma_wc_write(&cc->cc_cid); break; case IB_WC_WR_FLUSH_ERR: trace_svcrdma_wc_write_flush(wc, &cc->cc_cid); diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 09f5d0570bc9..31b711deab5e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -305,7 +305,7 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) if (unlikely(wc->status != IB_WC_SUCCESS)) goto flushed; - trace_svcrdma_wc_send(wc, &ctxt->sc_cid); + trace_svcrdma_wc_send(&ctxt->sc_cid); svc_rdma_send_ctxt_put(rdma, ctxt); return; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 28c0771c4e8c..4f8d7efa469f 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1364,7 +1364,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp) } rep->rr_cid.ci_queue_id = ep->re_attr.recv_cq->res.id; - trace_xprtrdma_post_recv(rep); + trace_xprtrdma_post_recv(&rep->rr_cid); rep->rr_recv_wr.next = wr; wr = &rep->rr_recv_wr; --needed; From patchwork Mon Nov 27 16:33:37 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13469941 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id E62F036B0A; Mon, 27 Nov 2023 16:33:38 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="IrNQby2T" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 144E0C433CA; Mon, 27 Nov 2023 16:33:38 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1701102818; bh=5YOVMxDLVsAZTaK+Wm4nPQTpbOkeewHjzw0Q0J8RWy8=; h=Subject:From:To:Cc:Date:In-Reply-To:References:From; b=IrNQby2TFsWjVTVNb9g19mauh+jZuEpaU+TzP0vgaP3GzQquvBrUI0Z/91+HCXpQs RwWe0ujfLaDSKelrC93PiQSAcoxYUuztewM3yCNqR3lfEOc2kktpkbPLApuDiJjdN5 RX9IaKh8SaGloOhqmUculPQkiSdVmgXOFKgEkdk1RZdzoDeCzhNYQt+jmaDFPu+cB1 OAIZtzYrSwGREuP46js8602/GHWay7TZ9BYEAAvSSR1wlqtmKMZfEDBcg8x8SLoLJs 7V/xsa4QOMJu2Sh4dZfUNQyCy7pM5Jv2m4qEfIO9/HBWjpEt09HK48osmXwb3zL+D0 RsVixEogXo5vA== Subject: [PATCH v1 3/5] svcrdma: SQ error tracepoints should report completion IDs From: Chuck Lever To: linux-nfs@vger.kernel.org, linux-rdma@vger.kernel.org Cc: Chuck Lever , tom@talpey.com Date: Mon, 27 Nov 2023 11:33:37 -0500 Message-ID: <170110281706.49524.12668997627474561777.stgit@bazille.1015granger.net> In-Reply-To: <170110267835.49524.14512830016966273991.stgit@bazille.1015granger.net> References: <170110267835.49524.14512830016966273991.stgit@bazille.1015granger.net> User-Agent: StGit/1.5 Precedence: bulk X-Mailing-List: linux-rdma@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Chuck Lever Update the Send Queue's error flow tracepoints to report the completion ID of the waiting or failing context. This ties the wait/failure to a particular operation or request, which is a little more useful than knowing only the transport that is about to close. Signed-off-by: Chuck Lever --- include/trace/events/rpcrdma.h | 49 ++++++++++++++++++++------------- net/sunrpc/xprtrdma/svc_rdma_rw.c | 6 ++-- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 6 ++-- 3 files changed, 35 insertions(+), 26 deletions(-) diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index b3445e07c151..f1c2022d39ca 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -2143,65 +2143,74 @@ TRACE_EVENT(svcrdma_qp_error, ) ); -DECLARE_EVENT_CLASS(svcrdma_sendqueue_event, +DECLARE_EVENT_CLASS(svcrdma_sendqueue_class, TP_PROTO( - const struct svcxprt_rdma *rdma + const struct svcxprt_rdma *rdma, + const struct rpc_rdma_cid *cid ), - TP_ARGS(rdma), + TP_ARGS(rdma, cid), TP_STRUCT__entry( + __field(u32, cq_id) + __field(int, completion_id) __field(int, avail) __field(int, depth) - __string(addr, rdma->sc_xprt.xpt_remotebuf) ), TP_fast_assign( + __entry->cq_id = cid->ci_queue_id; + __entry->completion_id = cid->ci_completion_id; __entry->avail = atomic_read(&rdma->sc_sq_avail); __entry->depth = rdma->sc_sq_depth; - __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); ), - TP_printk("addr=%s sc_sq_avail=%d/%d", - __get_str(addr), __entry->avail, __entry->depth + TP_printk("cq.id=%u cid=%d sc_sq_avail=%d/%d", + __entry->cq_id, __entry->completion_id, + __entry->avail, __entry->depth ) ); #define DEFINE_SQ_EVENT(name) \ - DEFINE_EVENT(svcrdma_sendqueue_event, svcrdma_sq_##name,\ - TP_PROTO( \ - const struct svcxprt_rdma *rdma \ - ), \ - TP_ARGS(rdma)) + DEFINE_EVENT(svcrdma_sendqueue_class, name, \ + TP_PROTO( \ + const struct svcxprt_rdma *rdma, \ + const struct rpc_rdma_cid *cid \ + ), \ + TP_ARGS(rdma, cid) \ + ) -DEFINE_SQ_EVENT(full); -DEFINE_SQ_EVENT(retry); +DEFINE_SQ_EVENT(svcrdma_sq_full); +DEFINE_SQ_EVENT(svcrdma_sq_retry); TRACE_EVENT(svcrdma_sq_post_err, TP_PROTO( const struct svcxprt_rdma *rdma, + const struct rpc_rdma_cid *cid, int status ), - TP_ARGS(rdma, status), + TP_ARGS(rdma, cid, status), TP_STRUCT__entry( + __field(u32, cq_id) + __field(int, completion_id) __field(int, avail) __field(int, depth) __field(int, status) - __string(addr, rdma->sc_xprt.xpt_remotebuf) ), TP_fast_assign( + __entry->cq_id = cid->ci_queue_id; + __entry->completion_id = cid->ci_completion_id; __entry->avail = atomic_read(&rdma->sc_sq_avail); __entry->depth = rdma->sc_sq_depth; __entry->status = status; - __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); ), - TP_printk("addr=%s sc_sq_avail=%d/%d status=%d", - __get_str(addr), __entry->avail, __entry->depth, - __entry->status + TP_printk("cq.id=%u cid=%d sc_sq_avail=%d/%d status=%d", + __entry->cq_id, __entry->completion_id, + __entry->avail, __entry->depth, __entry->status ) ); diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index db2a4bd2f7ad..b06e49cc55fb 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -406,14 +406,14 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc) } percpu_counter_inc(&svcrdma_stat_sq_starve); - trace_svcrdma_sq_full(rdma); + trace_svcrdma_sq_full(rdma, &cc->cc_cid); atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail); wait_event(rdma->sc_send_wait, atomic_read(&rdma->sc_sq_avail) > cc->cc_sqecount); - trace_svcrdma_sq_retry(rdma); + trace_svcrdma_sq_retry(rdma, &cc->cc_cid); } while (1); - trace_svcrdma_sq_post_err(rdma, ret); + trace_svcrdma_sq_post_err(rdma, &cc->cc_cid, ret); svc_xprt_deferred_close(&rdma->sc_xprt); /* If even one was posted, there will be a completion. */ diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 31b711deab5e..2ee691c45b85 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -343,13 +343,13 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt) while (1) { if ((atomic_dec_return(&rdma->sc_sq_avail) < 0)) { percpu_counter_inc(&svcrdma_stat_sq_starve); - trace_svcrdma_sq_full(rdma); + trace_svcrdma_sq_full(rdma, &ctxt->sc_cid); atomic_inc(&rdma->sc_sq_avail); wait_event(rdma->sc_send_wait, atomic_read(&rdma->sc_sq_avail) > 1); if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) return -ENOTCONN; - trace_svcrdma_sq_retry(rdma); + trace_svcrdma_sq_retry(rdma, &ctxt->sc_cid); continue; } @@ -360,7 +360,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt) return 0; } - trace_svcrdma_sq_post_err(rdma, ret); + trace_svcrdma_sq_post_err(rdma, &ctxt->sc_cid, ret); svc_xprt_deferred_close(&rdma->sc_xprt); wake_up(&rdma->sc_send_wait); return ret; From patchwork Mon Nov 27 16:33:43 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13469942 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 2D78536B18; Mon, 27 Nov 2023 16:33:45 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="Zhw3Rk0N" Received: by smtp.kernel.org (Postfix) with ESMTPSA id A2E8FC433CB; Mon, 27 Nov 2023 16:33:44 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1701102825; bh=NMAGBd18mICFtdZSaprt2tz+g9mcjLQh/meV0koSYYo=; h=Subject:From:To:Cc:Date:In-Reply-To:References:From; b=Zhw3Rk0NesaMmX6rVXZJmqemyNQKC4GgvGPw59ikolXGXiLdKb3E5J+Ajk/AkqPh9 jueXVLkdDo0dw/dpw/x/OeHeeBKmVXC/9dpEe8kK6h2YY1A0+KHJbqVwtXq2Mgc80i 8qWbGxQrMjWuLlqwIQk8TM6E2+6q7UUm4JdUmRdZZPOBOiTLkGj/k/Cr8ldOPiUHJW W6L7qhz4z28CNJec8vHTCwQcbE2NoaJDtXWHeWQSzbOxvG0kLcKanoe7MrkJfq6Xt5 fk9SUldRn5LHl/IgDLH2h278KeP8oNTgEGDvJPcomYW3PRINGlUdfazV8Ut/icQM/X iEsKLnbXKrU2Q== Subject: [PATCH v1 4/5] svcrdma: DMA error tracepoints should report completion IDs From: Chuck Lever To: linux-nfs@vger.kernel.org, linux-rdma@vger.kernel.org Cc: Chuck Lever , tom@talpey.com Date: Mon, 27 Nov 2023 11:33:43 -0500 Message-ID: <170110282361.49524.15995241363735316406.stgit@bazille.1015granger.net> In-Reply-To: <170110267835.49524.14512830016966273991.stgit@bazille.1015granger.net> References: <170110267835.49524.14512830016966273991.stgit@bazille.1015granger.net> User-Agent: StGit/1.5 Precedence: bulk X-Mailing-List: linux-rdma@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Chuck Lever Update the DMA error flow tracepoints to report the completion ID of the failing context. This ties the wait/failure to a particular operation or request, which is more useful than knowing only the failing transport. Signed-off-by: Chuck Lever --- include/trace/events/rpcrdma.h | 74 +++++++++++++++++++------------------ net/sunrpc/xprtrdma/svc_rdma_rw.c | 9 +++-- 2 files changed, 42 insertions(+), 41 deletions(-) diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index f1c2022d39ca..bba758e5fb1d 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1805,33 +1805,37 @@ DEFINE_SVC_DMA_EVENT(dma_unmap_page); TRACE_EVENT(svcrdma_dma_map_rw_err, TP_PROTO( const struct svcxprt_rdma *rdma, + u64 offset, + u32 handle, unsigned int nents, int status ), - TP_ARGS(rdma, nents, status), + TP_ARGS(rdma, offset, handle, nents, status), TP_STRUCT__entry( - __field(int, status) + __field(u32, cq_id) + __field(u32, handle) + __field(u64, offset) __field(unsigned int, nents) - __string(device, rdma->sc_cm_id->device->name) - __string(addr, rdma->sc_xprt.xpt_remotebuf) + __field(int, status) ), TP_fast_assign( - __entry->status = status; + __entry->cq_id = rdma->sc_sq_cq->res.id; + __entry->handle = handle; + __entry->offset = offset; __entry->nents = nents; - __assign_str(device, rdma->sc_cm_id->device->name); - __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); + __entry->status = status; ), - TP_printk("addr=%s device=%s nents=%u status=%d", - __get_str(addr), __get_str(device), __entry->nents, - __entry->status + TP_printk("cq.id=%u 0x%016llx:0x%08x nents=%u status=%d", + __entry->cq_id, (unsigned long long)__entry->offset, + __entry->handle, __entry->nents, __entry->status ) ); -TRACE_EVENT(svcrdma_no_rwctx_err, +TRACE_EVENT(svcrdma_rwctx_empty, TP_PROTO( const struct svcxprt_rdma *rdma, unsigned int num_sges @@ -1840,79 +1844,75 @@ TRACE_EVENT(svcrdma_no_rwctx_err, TP_ARGS(rdma, num_sges), TP_STRUCT__entry( + __field(u32, cq_id) __field(unsigned int, num_sges) - __string(device, rdma->sc_cm_id->device->name) - __string(addr, rdma->sc_xprt.xpt_remotebuf) ), TP_fast_assign( + __entry->cq_id = rdma->sc_sq_cq->res.id; __entry->num_sges = num_sges; - __assign_str(device, rdma->sc_cm_id->device->name); - __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); ), - TP_printk("addr=%s device=%s num_sges=%d", - __get_str(addr), __get_str(device), __entry->num_sges + TP_printk("cq.id=%u num_sges=%d", + __entry->cq_id, __entry->num_sges ) ); TRACE_EVENT(svcrdma_page_overrun_err, TP_PROTO( - const struct svcxprt_rdma *rdma, - const struct svc_rqst *rqst, + const struct rpc_rdma_cid *cid, unsigned int pageno ), - TP_ARGS(rdma, rqst, pageno), + TP_ARGS(cid, pageno), TP_STRUCT__entry( + __field(u32, cq_id) + __field(int, completion_id) __field(unsigned int, pageno) - __field(u32, xid) - __string(device, rdma->sc_cm_id->device->name) - __string(addr, rdma->sc_xprt.xpt_remotebuf) ), TP_fast_assign( + __entry->cq_id = cid->ci_queue_id; + __entry->completion_id = cid->ci_completion_id; __entry->pageno = pageno; - __entry->xid = __be32_to_cpu(rqst->rq_xid); - __assign_str(device, rdma->sc_cm_id->device->name); - __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); ), - TP_printk("addr=%s device=%s xid=0x%08x pageno=%u", __get_str(addr), - __get_str(device), __entry->xid, __entry->pageno + TP_printk("cq.id=%u cid=%d pageno=%u", + __entry->cq_id, __entry->completion_id, + __entry->pageno ) ); TRACE_EVENT(svcrdma_small_wrch_err, TP_PROTO( - const struct svcxprt_rdma *rdma, + const struct rpc_rdma_cid *cid, unsigned int remaining, unsigned int seg_no, unsigned int num_segs ), - TP_ARGS(rdma, remaining, seg_no, num_segs), + TP_ARGS(cid, remaining, seg_no, num_segs), TP_STRUCT__entry( + __field(u32, cq_id) + __field(int, completion_id) __field(unsigned int, remaining) __field(unsigned int, seg_no) __field(unsigned int, num_segs) - __string(device, rdma->sc_cm_id->device->name) - __string(addr, rdma->sc_xprt.xpt_remotebuf) ), TP_fast_assign( + __entry->cq_id = cid->ci_queue_id; + __entry->completion_id = cid->ci_completion_id; __entry->remaining = remaining; __entry->seg_no = seg_no; __entry->num_segs = num_segs; - __assign_str(device, rdma->sc_cm_id->device->name); - __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); ), - TP_printk("addr=%s device=%s remaining=%u seg_no=%u num_segs=%u", - __get_str(addr), __get_str(device), __entry->remaining, - __entry->seg_no, __entry->num_segs + TP_printk("cq.id=%u cid=%d remaining=%u seg_no=%u num_segs=%u", + __entry->cq_id, __entry->completion_id, + __entry->remaining, __entry->seg_no, __entry->num_segs ) ); diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index b06e49cc55fb..c06676714417 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -80,7 +80,7 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges) out_free: kfree(ctxt); out_noctx: - trace_svcrdma_no_rwctx_err(rdma, sges); + trace_svcrdma_rwctx_empty(rdma, sges); return NULL; } @@ -135,8 +135,9 @@ static int svc_rdma_rw_ctx_init(struct svcxprt_rdma *rdma, ctxt->rw_sg_table.sgl, ctxt->rw_nents, 0, offset, handle, direction); if (unlikely(ret < 0)) { + trace_svcrdma_dma_map_rw_err(rdma, offset, handle, + ctxt->rw_nents, ret); svc_rdma_put_rw_ctxt(rdma, ctxt); - trace_svcrdma_dma_map_rw_err(rdma, ctxt->rw_nents, ret); } return ret; } @@ -526,7 +527,7 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info, return 0; out_overflow: - trace_svcrdma_small_wrch_err(rdma, remaining, info->wi_seg_no, + trace_svcrdma_small_wrch_err(&cc->cc_cid, remaining, info->wi_seg_no, info->wi_chunk->ch_segcount); return -E2BIG; } @@ -766,7 +767,7 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info, return 0; out_overrun: - trace_svcrdma_page_overrun_err(cc->cc_rdma, rqstp, info->ri_pageno); + trace_svcrdma_page_overrun_err(&cc->cc_cid, info->ri_pageno); return -EINVAL; } From patchwork Mon Nov 27 16:33:50 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chuck Lever X-Patchwork-Id: 13469943 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 9E632374D9; Mon, 27 Nov 2023 16:33:51 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="JafuWhO9" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 2327BC433C8; Mon, 27 Nov 2023 16:33:51 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1701102831; bh=0xwrsVsqBTI3G+nXyOC3xn+wjwZZsJXyDBKhcdM8ygk=; h=Subject:From:To:Cc:Date:In-Reply-To:References:From; b=JafuWhO9elbsU5TZ7bD8awq/xNudOE7+OWZctpq4k+utlLGpJ5g5jsJ+5tqiV11yU XSd8IdCRvC8MLODOaclC+zCxLlqDmprKcPlLfzL18JBCBHSTc33dAnVlPOCjtpKG4Y mJhPZrZEHIvfMwfgmrZ9wliO0dQ2QqwM9X9ND596S7lfUagtSR3zX32WgM4uG1EQH5 VGk7HOrUt41D1Xzxh9qVuRKtFBpyiRXWqsvkwYDUe9rcap9/6c2C/DshNx+37bZ6Z+ ugl/UWtT02TI9RQYCOjG8AwBDOeNSHzLBFG6HzG97yOBAbK6eQCx3GCWaNS3nAM3bR nR70nrTHtpSuw== Subject: [PATCH v1 5/5] svcrdma: Update some svcrdma DMA-related tracepoints From: Chuck Lever To: linux-nfs@vger.kernel.org, linux-rdma@vger.kernel.org Cc: Chuck Lever , tom@talpey.com Date: Mon, 27 Nov 2023 11:33:50 -0500 Message-ID: <170110283021.49524.4951369507516981349.stgit@bazille.1015granger.net> In-Reply-To: <170110267835.49524.14512830016966273991.stgit@bazille.1015granger.net> References: <170110267835.49524.14512830016966273991.stgit@bazille.1015granger.net> User-Agent: StGit/1.5 Precedence: bulk X-Mailing-List: linux-rdma@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Chuck Lever A send/recv_ctxt already records transport-related information in the cq.id, thus there is no need to record the IP addresses of the transport endpoints. Signed-off-by: Chuck Lever --- include/trace/events/rpcrdma.h | 21 +++++++++++---------- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 10 +++++----- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index bba758e5fb1d..9a3fc6eb09a8 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1762,29 +1762,29 @@ DEFINE_ERROR_EVENT(chunk); DECLARE_EVENT_CLASS(svcrdma_dma_map_class, TP_PROTO( - const struct svcxprt_rdma *rdma, + const struct rpc_rdma_cid *cid, u64 dma_addr, u32 length ), - TP_ARGS(rdma, dma_addr, length), + TP_ARGS(cid, dma_addr, length), TP_STRUCT__entry( + __field(u32, cq_id) + __field(int, completion_id) __field(u64, dma_addr) __field(u32, length) - __string(device, rdma->sc_cm_id->device->name) - __string(addr, rdma->sc_xprt.xpt_remotebuf) ), TP_fast_assign( + __entry->cq_id = cid->ci_queue_id; + __entry->completion_id = cid->ci_completion_id; __entry->dma_addr = dma_addr; __entry->length = length; - __assign_str(device, rdma->sc_cm_id->device->name); - __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); ), - TP_printk("addr=%s device=%s dma_addr=%llu length=%u", - __get_str(addr), __get_str(device), + TP_printk("cq.id=%u cid=%d dma_addr=%llu length=%u", + __entry->cq_id, __entry->completion_id, __entry->dma_addr, __entry->length ) ); @@ -1792,11 +1792,12 @@ DECLARE_EVENT_CLASS(svcrdma_dma_map_class, #define DEFINE_SVC_DMA_EVENT(name) \ DEFINE_EVENT(svcrdma_dma_map_class, svcrdma_##name, \ TP_PROTO( \ - const struct svcxprt_rdma *rdma,\ + const struct rpc_rdma_cid *cid, \ u64 dma_addr, \ u32 length \ ), \ - TP_ARGS(rdma, dma_addr, length)) + TP_ARGS(cid, dma_addr, length) \ + ) DEFINE_SVC_DMA_EVENT(dma_map_page); DEFINE_SVC_DMA_EVENT(dma_map_err); diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 2ee691c45b85..9571ed4a74d4 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -237,13 +237,13 @@ static void svc_rdma_send_ctxt_release(struct svcxprt_rdma *rdma, * remains mapped until @ctxt is destroyed. */ for (i = 1; i < ctxt->sc_send_wr.num_sge; i++) { + trace_svcrdma_dma_unmap_page(&ctxt->sc_cid, + ctxt->sc_sges[i].addr, + ctxt->sc_sges[i].length); ib_dma_unmap_page(device, ctxt->sc_sges[i].addr, ctxt->sc_sges[i].length, DMA_TO_DEVICE); - trace_svcrdma_dma_unmap_page(rdma, - ctxt->sc_sges[i].addr, - ctxt->sc_sges[i].length); } llist_add(&ctxt->sc_node, &rdma->sc_send_ctxts); @@ -550,14 +550,14 @@ static int svc_rdma_page_dma_map(void *data, struct page *page, if (ib_dma_mapping_error(dev, dma_addr)) goto out_maperr; - trace_svcrdma_dma_map_page(rdma, dma_addr, len); + trace_svcrdma_dma_map_page(&ctxt->sc_cid, dma_addr, len); ctxt->sc_sges[ctxt->sc_cur_sge_no].addr = dma_addr; ctxt->sc_sges[ctxt->sc_cur_sge_no].length = len; ctxt->sc_send_wr.num_sge++; return 0; out_maperr: - trace_svcrdma_dma_map_err(rdma, dma_addr, len); + trace_svcrdma_dma_map_err(&ctxt->sc_cid, dma_addr, len); return -EIO; }