diff mbox series

[for-next,V4,1/6] IB/MAD: Add send path trace points

Message ID 20190211151503.16434-2-ira.weiny@intel.com (mailing list archive)
State Superseded
Delegated to: Jason Gunthorpe
Headers show
Series Add MAD stack trace points | expand

Commit Message

Ira Weiny Feb. 11, 2019, 3:14 p.m. UTC
From: Ira Weiny <ira.weiny@intel.com>

Use the standard Linux trace mechanism to trace MADs being sent.  4 trace
points are added, when the MAD is posted to the qp, when the MAD is
completed, if a MAD is resent, and when the MAD completes in error.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>

---

Changes since v3:
	Change dev_name to dev_index

Changes since v2:
	Change license text to SPDX tag
	Change dev_name to string from array
	Reorder fields for more compact ring buffer utilization
	Use a defined roce address type for safer memcpy/memset

Changes since v1:
	Update MAINTAINERS with tracing file

 MAINTAINERS                   |   1 +
 drivers/infiniband/core/mad.c |  73 ++++++++++++++-
 include/trace/events/ib_mad.h | 164 ++++++++++++++++++++++++++++++++++
 3 files changed, 237 insertions(+), 1 deletion(-)
 create mode 100644 include/trace/events/ib_mad.h

Comments

Michael J. Ruhl Feb. 11, 2019, 6:02 p.m. UTC | #1
>-----Original Message-----
>From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma-
>owner@vger.kernel.org] On Behalf Of ira.weiny@intel.com
>Sent: Monday, February 11, 2019 10:15 AM
>To: linux-rdma@vger.kernel.org
>Cc: Hal Rosenstock <hal@dev.mellanox.co.il>; Doug Ledford
><dledford@redhat.com>; Jason Gunthorpe <jgg@ziepe.ca>; Steven Rostedt
><rostedt@goodmis.org>; Ingo Molnar <mingo@redhat.com>; Alexei
>Starovoitov <ast@kernel.org>; Daniel Borkmann <daniel@iogearbox.net>;
>Leon Romanovsky <leon@kernel.org>; Weiny, Ira <ira.weiny@intel.com>
>Subject: [PATCH for-next V4 1/6] IB/MAD: Add send path trace points
>
>From: Ira Weiny <ira.weiny@intel.com>
>
>Use the standard Linux trace mechanism to trace MADs being sent.  4 trace
>points are added, when the MAD is posted to the qp, when the MAD is
>completed, if a MAD is resent, and when the MAD completes in error.
>
>Signed-off-by: Ira Weiny <ira.weiny@intel.com>
>
>---
>
>Changes since v3:
>	Change dev_name to dev_index
>
>Changes since v2:
>	Change license text to SPDX tag
>	Change dev_name to string from array
>	Reorder fields for more compact ring buffer utilization
>	Use a defined roce address type for safer memcpy/memset
>
>Changes since v1:
>	Update MAINTAINERS with tracing file
>
> MAINTAINERS                   |   1 +
> drivers/infiniband/core/mad.c |  73 ++++++++++++++-
> include/trace/events/ib_mad.h | 164
>++++++++++++++++++++++++++++++++++
> 3 files changed, 237 insertions(+), 1 deletion(-)
> create mode 100644 include/trace/events/ib_mad.h
>
>diff --git a/MAINTAINERS b/MAINTAINERS
>index 8c68de3cfd80..9254bd40f1ae 100644
>--- a/MAINTAINERS
>+++ b/MAINTAINERS
>@@ -7491,6 +7491,7 @@ F:	drivers/infiniband/
> F:	include/uapi/linux/if_infiniband.h
> F:	include/uapi/rdma/
> F:	include/rdma/
>+F:	include/trace/events/ib_mad.h
>
> INGENIC JZ4780 DMA Driver
> M:	Zubair Lutfullah Kakakhel <Zubair.Kakakhel@imgtec.com>
>diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
>index 7870823bac47..e166d584b80c 100644
>--- a/drivers/infiniband/core/mad.c
>+++ b/drivers/infiniband/core/mad.c
>@@ -3,7 +3,7 @@
>  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
>  * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
>  * Copyright (c) 2009 HNR Consulting. All rights reserved.
>- * Copyright (c) 2014 Intel Corporation.  All rights reserved.
>+ * Copyright (c) 2014,2018 Intel Corporation.  All rights reserved.
>  *
>  * This software is available to you under a choice of one of two
>  * licenses.  You may choose to be licensed under the terms of the GNU
>@@ -51,6 +51,51 @@
> #include "opa_smi.h"
> #include "agent.h"
>
>+struct rdma_mad_trace_addr {
>+	u32 dlid;
>+	struct roce_ah_attr roce_addr;
>+	u8 sl;
>+	u16 pkey;
>+	u32 rqpn;
>+	u32 rqkey;
>+};
>+static void trace_create_mad_addr(struct ib_device *dev, u8 pnum,
>+				  struct ib_ud_wr *wr,
>+				  struct rdma_mad_trace_addr *addr)
>+{
>+	struct rdma_ah_attr attr;
>+
>+	memset(&attr, 0, sizeof(attr));
>+	rdma_query_ah(wr->ah, &attr);
>+
>+	/* These are common */
>+	addr->sl = attr.sl;
>+	ib_query_pkey(dev, pnum, wr->pkey_index, &addr->pkey);
>+	addr->rqpn = wr->remote_qpn;
>+	addr->rqkey = wr->remote_qkey;
>+
>+	switch (attr.type) {
>+	case RDMA_AH_ATTR_TYPE_IB:
>+		addr->dlid = attr.ib.dlid;
>+		memset(&addr->roce_addr, 0, sizeof(addr->roce_addr));
>+		break;
>+	case RDMA_AH_ATTR_TYPE_OPA:
>+		addr->dlid = attr.opa.dlid;
>+		memset(&addr->roce_addr, 0, sizeof(addr->roce_addr));
>+		break;
>+	case RDMA_AH_ATTR_TYPE_ROCE:
>+		addr->dlid = 0;
>+		memcpy(&addr->roce_addr, &attr.roce, sizeof(addr-
>>roce_addr));
>+		break;
>+	case RDMA_AH_ATTR_TYPE_UNDEFINED:
>+		addr->dlid = 0;
>+		memset(&addr->roce_addr, 0, sizeof(addr->roce_addr));
>+		break;
>+	}
>+}
>+#define CREATE_TRACE_POINTS
>+#include <trace/events/ib_mad.h>
>+
> static int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
> static int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
>
>@@ -1223,6 +1268,14 @@ int ib_send_mad(struct ib_mad_send_wr_private
>*mad_send_wr)
>
> 	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
> 	if (qp_info->send_queue.count < qp_info->send_queue.max_active)
>{
>+		if (trace_ib_mad_ib_send_mad_enabled()) {
>+			struct rdma_mad_trace_addr addr;
>+
>+			trace_create_mad_addr(qp_info->port_priv->device,
>+					      qp_info->port_priv->port_num,
>+					      &mad_send_wr->send_wr,
>&addr);
>+			trace_ib_mad_ib_send_mad(mad_send_wr, &addr);
>+		}
> 		ret = ib_post_send(mad_agent->qp, &mad_send_wr-
>>send_wr.wr,
> 				   NULL);
> 		list = &qp_info->send_queue.list;

Ira,

Are the trace_ib_mad_ib_send_mad_enabled() (and the other locations)
actually necessary?

When you populate the TP_STRUCT_entry values, you can call a function to
get information (i.e. see the hfi1/trac_ibhdrs.h  usage of hfi1_trace_packet_hdr_len().

You may be able to call the trace_create_mad_addr() there (will probably need
to pass qp_info into trac_ib_mad_ib_send_mad()) rather than having to have
this enable check in the code.

Mike


>@@ -2496,6 +2549,8 @@ static void ib_mad_send_done(struct ib_cq *cq,
>struct ib_wc *wc)
> 	send_queue = mad_list->mad_queue;
> 	qp_info = send_queue->qp_info;
>
>+	trace_ib_mad_send_done_handler(mad_send_wr, wc);
>+
> retry:
> 	ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent-
>>device,
> 			    mad_send_wr->header_mapping,
>@@ -2527,6 +2582,14 @@ static void ib_mad_send_done(struct ib_cq *cq,
>struct ib_wc *wc)
> 	ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
>
> 	if (queued_send_wr) {
>+		if (trace_ib_mad_send_done_resend_enabled()) {
>+			struct rdma_mad_trace_addr addr;
>+
>+			trace_create_mad_addr(qp_info->port_priv->device,
>+					      qp_info->port_priv->port_num,
>+					      &mad_send_wr->send_wr,
>&addr);
>+
>	trace_ib_mad_send_done_resend(queued_send_wr, &addr);
>+		}
> 		ret = ib_post_send(qp_info->qp, &queued_send_wr-
>>send_wr.wr,
> 				   NULL);
> 		if (ret) {
>@@ -2574,6 +2637,14 @@ static bool ib_mad_send_error(struct
>ib_mad_port_private *port_priv,
> 		if (mad_send_wr->retry) {
> 			/* Repost send */
> 			mad_send_wr->retry = 0;
>+			if (trace_ib_mad_error_handler_enabled()) {
>+				struct rdma_mad_trace_addr addr;
>+
>+				trace_create_mad_addr(qp_info->port_priv-
>>device,
>+						      qp_info->port_priv-
>>port_num,
>+						      &mad_send_wr->send_wr,
>&addr);
>+				trace_ib_mad_error_handler(mad_send_wr,
>&addr);
>+			}
> 			ret = ib_post_send(qp_info->qp, &mad_send_wr-
>>send_wr.wr,
> 					   NULL);
> 			if (!ret)
>diff --git a/include/trace/events/ib_mad.h b/include/trace/events/ib_mad.h
>new file mode 100644
>index 000000000000..30f72a29cccb
>--- /dev/null
>+++ b/include/trace/events/ib_mad.h
>@@ -0,0 +1,164 @@
>+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
>+
>+/*
>+ * Copyright (c) 2018 Intel Corporation.  All rights reserved.
>+ */
>+
>+#undef TRACE_SYSTEM
>+#define TRACE_SYSTEM ib_mad
>+
>+#if !defined(_TRACE_IB_MAD_H) || defined(TRACE_HEADER_MULTI_READ)
>+#define _TRACE_IB_MAD_H
>+
>+#include <linux/tracepoint.h>
>+#include <rdma/ib_mad.h>
>+
>+DECLARE_EVENT_CLASS(ib_mad_send_template,
>+	TP_PROTO(struct ib_mad_send_wr_private *wr, struct
>rdma_mad_trace_addr *addr),
>+	TP_ARGS(wr, addr),
>+
>+	TP_STRUCT__entry(
>+		__field(u8,             base_version)
>+		__field(u8,             mgmt_class)
>+		__field(u8,             class_version)
>+		__field(u8,             port_num)
>+		__field(u32,            qp_num)
>+		__field(u8,             method)
>+		__field(u8,             sl)
>+		__field(u16,            attr_id)
>+		__field(u32,            attr_mod)
>+		__field(u64,            wrtid)
>+		__field(u64,            tid)
>+		__field(u16,            status)
>+		__field(u16,            class_specific)
>+		__field(u32,            length)
>+		__field(u32,            dlid)
>+		__field(u32,            rqpn)
>+		__field(u32,            rqkey)
>+		__field(u32,            dev_index)
>+		__field(void *,         agent_priv)
>+		__field(unsigned long,  timeout)
>+		__field(int,            retries_left)
>+		__field(int,            max_retries)
>+		__field(int,            retry)
>+		__field(u16,            pkey)
>+	),
>+
>+	TP_fast_assign(
>+		__entry->dev_index = wr->mad_agent_priv->agent.device-
>>index;
>+		__entry->port_num = wr->mad_agent_priv-
>>agent.port_num;
>+		__entry->qp_num = wr->mad_agent_priv->qp_info->qp-
>>qp_num;
>+		__entry->agent_priv = wr->mad_agent_priv;
>+		__entry->wrtid = wr->tid;
>+		__entry->max_retries = wr->max_retries;
>+		__entry->retries_left = wr->retries_left;
>+		__entry->retry = wr->retry;
>+		__entry->timeout = wr->timeout;
>+		__entry->length = wr->send_buf.hdr_len +
>+				  wr->send_buf.data_len;
>+		__entry->base_version = ((struct ib_mad_hdr *)wr-
>>send_buf.mad)->base_version;
>+		__entry->mgmt_class = ((struct ib_mad_hdr *)wr-
>>send_buf.mad)->mgmt_class;
>+		__entry->class_version = ((struct ib_mad_hdr *)wr-
>>send_buf.mad)->class_version;
>+		__entry->method = ((struct ib_mad_hdr *)wr-
>>send_buf.mad)->method;
>+		__entry->status = ((struct ib_mad_hdr *)wr-
>>send_buf.mad)->status;
>+		__entry->class_specific = ((struct ib_mad_hdr *)wr-
>>send_buf.mad)->class_specific;
>+		__entry->tid = ((struct ib_mad_hdr *)wr->send_buf.mad)-
>>tid;
>+		__entry->attr_id = ((struct ib_mad_hdr *)wr-
>>send_buf.mad)->attr_id;
>+		__entry->attr_mod = ((struct ib_mad_hdr *)wr-
>>send_buf.mad)->attr_mod;
>+		__entry->dlid = addr->dlid;
>+		__entry->sl = addr->sl;
>+		__entry->pkey = addr->pkey;
>+		__entry->rqpn = addr->rqpn;
>+		__entry->rqkey = addr->rqkey;
>+	),
>+
>+	TP_printk("%d:%d QP%d agent %p: " \
>+		  "wrtid 0x%llx; %d/%d retries(%d); timeout %lu length %d :
>hdr : " \
>+		  "base_ver 0x%x class 0x%x class_ver 0x%x method 0x%x " \
>+		  "status 0x%x class_specific 0x%x tid 0x%llx attr_id 0x%x
>attr_mod 0x%x " \
>+		  " => dlid 0x%08x sl %d pkey 0x%x rpqn 0x%x rqpkey 0x%x",
>+		__entry->dev_index, __entry->port_num, __entry-
>>qp_num,
>+		__entry->agent_priv, be64_to_cpu(__entry->wrtid),
>+		__entry->retries_left, __entry->max_retries,
>+		__entry->retry, __entry->timeout, __entry->length,
>+		__entry->base_version, __entry->mgmt_class, __entry-
>>class_version,
>+		__entry->method, be16_to_cpu(__entry->status),
>+		be16_to_cpu(__entry->class_specific),
>+		be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id),
>+		be32_to_cpu(__entry->attr_mod),
>+		be32_to_cpu(__entry->dlid), __entry->sl, __entry->pkey,
>__entry->rqpn,
>+		__entry->rqkey
>+	)
>+);
>+
>+DEFINE_EVENT(ib_mad_send_template, ib_mad_error_handler,
>+	TP_PROTO(struct ib_mad_send_wr_private *wr, struct
>rdma_mad_trace_addr *addr),
>+	TP_ARGS(wr, addr));
>+DEFINE_EVENT(ib_mad_send_template, ib_mad_ib_send_mad,
>+	TP_PROTO(struct ib_mad_send_wr_private *wr, struct
>rdma_mad_trace_addr *addr),
>+	TP_ARGS(wr, addr));
>+DEFINE_EVENT(ib_mad_send_template, ib_mad_send_done_resend,
>+	TP_PROTO(struct ib_mad_send_wr_private *wr, struct
>rdma_mad_trace_addr *addr),
>+	TP_ARGS(wr, addr));
>+
>+TRACE_EVENT(ib_mad_send_done_handler,
>+	TP_PROTO(struct ib_mad_send_wr_private *wr, struct ib_wc *wc),
>+	TP_ARGS(wr, wc),
>+
>+	TP_STRUCT__entry(
>+		__field(u8,             port_num)
>+		__field(u8,             base_version)
>+		__field(u8,             mgmt_class)
>+		__field(u8,             class_version)
>+		__field(u32,            qp_num)
>+		__field(u64,            wrtid)
>+		__field(u16,            status)
>+		__field(u16,            wc_status)
>+		__field(u32,            length)
>+		__field(void *,         agent_priv)
>+		__field(unsigned long,  timeout)
>+		__field(u32,            dev_index)
>+		__field(int,            retries_left)
>+		__field(int,            max_retries)
>+		__field(int,            retry)
>+		__field(u8,             method)
>+	),
>+
>+	TP_fast_assign(
>+		__entry->dev_index = wr->mad_agent_priv->agent.device-
>>index;
>+		__entry->port_num = wr->mad_agent_priv-
>>agent.port_num;
>+		__entry->qp_num = wr->mad_agent_priv->qp_info->qp-
>>qp_num;
>+		__entry->agent_priv = wr->mad_agent_priv;
>+		__entry->wrtid = wr->tid;
>+		__entry->max_retries = wr->max_retries;
>+		__entry->retries_left = wr->retries_left;
>+		__entry->retry = wr->retry;
>+		__entry->timeout = wr->timeout;
>+		__entry->base_version = ((struct ib_mad_hdr *)wr-
>>send_buf.mad)->base_version;
>+		__entry->mgmt_class = ((struct ib_mad_hdr *)wr-
>>send_buf.mad)->mgmt_class;
>+		__entry->class_version = ((struct ib_mad_hdr *)wr-
>>send_buf.mad)->class_version;
>+		__entry->method = ((struct ib_mad_hdr *)wr-
>>send_buf.mad)->method;
>+		__entry->status = ((struct ib_mad_hdr *)wr-
>>send_buf.mad)->status;
>+		__entry->wc_status = wc->status;
>+		__entry->length = wc->byte_len;
>+	),
>+
>+	TP_printk("%d:%d QP%d : SEND WC Status %d : agent %p: " \
>+		  "wrtid 0x%llx %d/%d retries(%d) timeout %lu length %d: hdr :
>" \
>+		  "base_ver 0x%x class 0x%x class_ver 0x%x method 0x%x " \
>+		  "status 0x%x",
>+		__entry->dev_index, __entry->port_num, __entry-
>>qp_num,
>+		__entry->wc_status,
>+		__entry->agent_priv, be64_to_cpu(__entry->wrtid),
>+		__entry->retries_left, __entry->max_retries,
>+		__entry->retry, __entry->timeout,
>+		__entry->length,
>+		__entry->base_version, __entry->mgmt_class, __entry-
>>class_version,
>+		__entry->method, be16_to_cpu(__entry->status)
>+	)
>+);
>+
>+
>+#endif /* _TRACE_IB_MAD_H */
>+
>+#include <trace/define_trace.h>
>--
>2.20.1
Ira Weiny Feb. 12, 2019, 7:14 p.m. UTC | #2
On Mon, Feb 11, 2019 at 10:02:02AM -0800, Michael Ruhl wrote:
> >-----Original Message-----
> >From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma-
> >owner@vger.kernel.org] On Behalf Of ira.weiny@intel.com
> >Sent: Monday, February 11, 2019 10:15 AM
> >To: linux-rdma@vger.kernel.org
> >Cc: Hal Rosenstock <hal@dev.mellanox.co.il>; Doug Ledford
> ><dledford@redhat.com>; Jason Gunthorpe <jgg@ziepe.ca>; Steven Rostedt
> ><rostedt@goodmis.org>; Ingo Molnar <mingo@redhat.com>; Alexei
> >Starovoitov <ast@kernel.org>; Daniel Borkmann <daniel@iogearbox.net>;
> >Leon Romanovsky <leon@kernel.org>; Weiny, Ira <ira.weiny@intel.com>
> >Subject: [PATCH for-next V4 1/6] IB/MAD: Add send path trace points
> >
> >From: Ira Weiny <ira.weiny@intel.com>
> >
> >Use the standard Linux trace mechanism to trace MADs being sent.  4 trace
> >points are added, when the MAD is posted to the qp, when the MAD is
> >completed, if a MAD is resent, and when the MAD completes in error.
> >
> >Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> >
> >---
> >
> >Changes since v3:
> >	Change dev_name to dev_index
> >
> >Changes since v2:
> >	Change license text to SPDX tag
> >	Change dev_name to string from array
> >	Reorder fields for more compact ring buffer utilization
> >	Use a defined roce address type for safer memcpy/memset
> >
> >Changes since v1:
> >	Update MAINTAINERS with tracing file
> >
> > MAINTAINERS                   |   1 +
> > drivers/infiniband/core/mad.c |  73 ++++++++++++++-
> > include/trace/events/ib_mad.h | 164
> >++++++++++++++++++++++++++++++++++
> > 3 files changed, 237 insertions(+), 1 deletion(-)
> > create mode 100644 include/trace/events/ib_mad.h
> >
> >diff --git a/MAINTAINERS b/MAINTAINERS
> >index 8c68de3cfd80..9254bd40f1ae 100644
> >--- a/MAINTAINERS
> >+++ b/MAINTAINERS
> >@@ -7491,6 +7491,7 @@ F:	drivers/infiniband/
> > F:	include/uapi/linux/if_infiniband.h
> > F:	include/uapi/rdma/
> > F:	include/rdma/
> >+F:	include/trace/events/ib_mad.h
> >
> > INGENIC JZ4780 DMA Driver
> > M:	Zubair Lutfullah Kakakhel <Zubair.Kakakhel@imgtec.com>
> >diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
> >index 7870823bac47..e166d584b80c 100644
> >--- a/drivers/infiniband/core/mad.c
> >+++ b/drivers/infiniband/core/mad.c
> >@@ -3,7 +3,7 @@
> >  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
> >  * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
> >  * Copyright (c) 2009 HNR Consulting. All rights reserved.
> >- * Copyright (c) 2014 Intel Corporation.  All rights reserved.
> >+ * Copyright (c) 2014,2018 Intel Corporation.  All rights reserved.
> >  *
> >  * This software is available to you under a choice of one of two
> >  * licenses.  You may choose to be licensed under the terms of the GNU
> >@@ -51,6 +51,51 @@
> > #include "opa_smi.h"
> > #include "agent.h"
> >
> >+struct rdma_mad_trace_addr {
> >+	u32 dlid;
> >+	struct roce_ah_attr roce_addr;
> >+	u8 sl;
> >+	u16 pkey;
> >+	u32 rqpn;
> >+	u32 rqkey;
> >+};
> >+static void trace_create_mad_addr(struct ib_device *dev, u8 pnum,
> >+				  struct ib_ud_wr *wr,
> >+				  struct rdma_mad_trace_addr *addr)
> >+{
> >+	struct rdma_ah_attr attr;
> >+
> >+	memset(&attr, 0, sizeof(attr));
> >+	rdma_query_ah(wr->ah, &attr);
> >+
> >+	/* These are common */
> >+	addr->sl = attr.sl;
> >+	ib_query_pkey(dev, pnum, wr->pkey_index, &addr->pkey);
> >+	addr->rqpn = wr->remote_qpn;
> >+	addr->rqkey = wr->remote_qkey;
> >+
> >+	switch (attr.type) {
> >+	case RDMA_AH_ATTR_TYPE_IB:
> >+		addr->dlid = attr.ib.dlid;
> >+		memset(&addr->roce_addr, 0, sizeof(addr->roce_addr));
> >+		break;
> >+	case RDMA_AH_ATTR_TYPE_OPA:
> >+		addr->dlid = attr.opa.dlid;
> >+		memset(&addr->roce_addr, 0, sizeof(addr->roce_addr));
> >+		break;
> >+	case RDMA_AH_ATTR_TYPE_ROCE:
> >+		addr->dlid = 0;
> >+		memcpy(&addr->roce_addr, &attr.roce, sizeof(addr-
> >>roce_addr));
> >+		break;
> >+	case RDMA_AH_ATTR_TYPE_UNDEFINED:
> >+		addr->dlid = 0;
> >+		memset(&addr->roce_addr, 0, sizeof(addr->roce_addr));
> >+		break;
> >+	}
> >+}
> >+#define CREATE_TRACE_POINTS
> >+#include <trace/events/ib_mad.h>
> >+
> > static int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
> > static int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
> >
> >@@ -1223,6 +1268,14 @@ int ib_send_mad(struct ib_mad_send_wr_private
> >*mad_send_wr)
> >
> > 	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
> > 	if (qp_info->send_queue.count < qp_info->send_queue.max_active)
> >{
> >+		if (trace_ib_mad_ib_send_mad_enabled()) {
> >+			struct rdma_mad_trace_addr addr;
> >+
> >+			trace_create_mad_addr(qp_info->port_priv->device,
> >+					      qp_info->port_priv->port_num,
> >+					      &mad_send_wr->send_wr,
> >&addr);
> >+			trace_ib_mad_ib_send_mad(mad_send_wr, &addr);
> >+		}
> > 		ret = ib_post_send(mad_agent->qp, &mad_send_wr-
> >>send_wr.wr,
> > 				   NULL);
> > 		list = &qp_info->send_queue.list;
> 
> Ira,
> 
> Are the trace_ib_mad_ib_send_mad_enabled() (and the other locations)
> actually necessary?
> 
> When you populate the TP_STRUCT_entry values, you can call a function to
> get information (i.e. see the hfi1/trac_ibhdrs.h  usage of hfi1_trace_packet_hdr_len().
> 
> You may be able to call the trace_create_mad_addr() there (will probably need
> to pass qp_info into trac_ib_mad_ib_send_mad()) rather than having to have
> this enable check in the code.

I'll check it out when I get some time.  You are probably right as I'm not an
expert in the trace functionality.

Thanks,
Ira

> 
> Mike
> 
> 
> >@@ -2496,6 +2549,8 @@ static void ib_mad_send_done(struct ib_cq *cq,
> >struct ib_wc *wc)
> > 	send_queue = mad_list->mad_queue;
> > 	qp_info = send_queue->qp_info;
> >
> >+	trace_ib_mad_send_done_handler(mad_send_wr, wc);
> >+
> > retry:
> > 	ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent-
> >>device,
> > 			    mad_send_wr->header_mapping,
> >@@ -2527,6 +2582,14 @@ static void ib_mad_send_done(struct ib_cq *cq,
> >struct ib_wc *wc)
> > 	ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
> >
> > 	if (queued_send_wr) {
> >+		if (trace_ib_mad_send_done_resend_enabled()) {
> >+			struct rdma_mad_trace_addr addr;
> >+
> >+			trace_create_mad_addr(qp_info->port_priv->device,
> >+					      qp_info->port_priv->port_num,
> >+					      &mad_send_wr->send_wr,
> >&addr);
> >+
> >	trace_ib_mad_send_done_resend(queued_send_wr, &addr);
> >+		}
> > 		ret = ib_post_send(qp_info->qp, &queued_send_wr-
> >>send_wr.wr,
> > 				   NULL);
> > 		if (ret) {
> >@@ -2574,6 +2637,14 @@ static bool ib_mad_send_error(struct
> >ib_mad_port_private *port_priv,
> > 		if (mad_send_wr->retry) {
> > 			/* Repost send */
> > 			mad_send_wr->retry = 0;
> >+			if (trace_ib_mad_error_handler_enabled()) {
> >+				struct rdma_mad_trace_addr addr;
> >+
> >+				trace_create_mad_addr(qp_info->port_priv-
> >>device,
> >+						      qp_info->port_priv-
> >>port_num,
> >+						      &mad_send_wr->send_wr,
> >&addr);
> >+				trace_ib_mad_error_handler(mad_send_wr,
> >&addr);
> >+			}
> > 			ret = ib_post_send(qp_info->qp, &mad_send_wr-
> >>send_wr.wr,
> > 					   NULL);
> > 			if (!ret)
> >diff --git a/include/trace/events/ib_mad.h b/include/trace/events/ib_mad.h
> >new file mode 100644
> >index 000000000000..30f72a29cccb
> >--- /dev/null
> >+++ b/include/trace/events/ib_mad.h
> >@@ -0,0 +1,164 @@
> >+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
> >+
> >+/*
> >+ * Copyright (c) 2018 Intel Corporation.  All rights reserved.
> >+ */
> >+
> >+#undef TRACE_SYSTEM
> >+#define TRACE_SYSTEM ib_mad
> >+
> >+#if !defined(_TRACE_IB_MAD_H) || defined(TRACE_HEADER_MULTI_READ)
> >+#define _TRACE_IB_MAD_H
> >+
> >+#include <linux/tracepoint.h>
> >+#include <rdma/ib_mad.h>
> >+
> >+DECLARE_EVENT_CLASS(ib_mad_send_template,
> >+	TP_PROTO(struct ib_mad_send_wr_private *wr, struct
> >rdma_mad_trace_addr *addr),
> >+	TP_ARGS(wr, addr),
> >+
> >+	TP_STRUCT__entry(
> >+		__field(u8,             base_version)
> >+		__field(u8,             mgmt_class)
> >+		__field(u8,             class_version)
> >+		__field(u8,             port_num)
> >+		__field(u32,            qp_num)
> >+		__field(u8,             method)
> >+		__field(u8,             sl)
> >+		__field(u16,            attr_id)
> >+		__field(u32,            attr_mod)
> >+		__field(u64,            wrtid)
> >+		__field(u64,            tid)
> >+		__field(u16,            status)
> >+		__field(u16,            class_specific)
> >+		__field(u32,            length)
> >+		__field(u32,            dlid)
> >+		__field(u32,            rqpn)
> >+		__field(u32,            rqkey)
> >+		__field(u32,            dev_index)
> >+		__field(void *,         agent_priv)
> >+		__field(unsigned long,  timeout)
> >+		__field(int,            retries_left)
> >+		__field(int,            max_retries)
> >+		__field(int,            retry)
> >+		__field(u16,            pkey)
> >+	),
> >+
> >+	TP_fast_assign(
> >+		__entry->dev_index = wr->mad_agent_priv->agent.device-
> >>index;
> >+		__entry->port_num = wr->mad_agent_priv-
> >>agent.port_num;
> >+		__entry->qp_num = wr->mad_agent_priv->qp_info->qp-
> >>qp_num;
> >+		__entry->agent_priv = wr->mad_agent_priv;
> >+		__entry->wrtid = wr->tid;
> >+		__entry->max_retries = wr->max_retries;
> >+		__entry->retries_left = wr->retries_left;
> >+		__entry->retry = wr->retry;
> >+		__entry->timeout = wr->timeout;
> >+		__entry->length = wr->send_buf.hdr_len +
> >+				  wr->send_buf.data_len;
> >+		__entry->base_version = ((struct ib_mad_hdr *)wr-
> >>send_buf.mad)->base_version;
> >+		__entry->mgmt_class = ((struct ib_mad_hdr *)wr-
> >>send_buf.mad)->mgmt_class;
> >+		__entry->class_version = ((struct ib_mad_hdr *)wr-
> >>send_buf.mad)->class_version;
> >+		__entry->method = ((struct ib_mad_hdr *)wr-
> >>send_buf.mad)->method;
> >+		__entry->status = ((struct ib_mad_hdr *)wr-
> >>send_buf.mad)->status;
> >+		__entry->class_specific = ((struct ib_mad_hdr *)wr-
> >>send_buf.mad)->class_specific;
> >+		__entry->tid = ((struct ib_mad_hdr *)wr->send_buf.mad)-
> >>tid;
> >+		__entry->attr_id = ((struct ib_mad_hdr *)wr-
> >>send_buf.mad)->attr_id;
> >+		__entry->attr_mod = ((struct ib_mad_hdr *)wr-
> >>send_buf.mad)->attr_mod;
> >+		__entry->dlid = addr->dlid;
> >+		__entry->sl = addr->sl;
> >+		__entry->pkey = addr->pkey;
> >+		__entry->rqpn = addr->rqpn;
> >+		__entry->rqkey = addr->rqkey;
> >+	),
> >+
> >+	TP_printk("%d:%d QP%d agent %p: " \
> >+		  "wrtid 0x%llx; %d/%d retries(%d); timeout %lu length %d :
> >hdr : " \
> >+		  "base_ver 0x%x class 0x%x class_ver 0x%x method 0x%x " \
> >+		  "status 0x%x class_specific 0x%x tid 0x%llx attr_id 0x%x
> >attr_mod 0x%x " \
> >+		  " => dlid 0x%08x sl %d pkey 0x%x rpqn 0x%x rqpkey 0x%x",
> >+		__entry->dev_index, __entry->port_num, __entry-
> >>qp_num,
> >+		__entry->agent_priv, be64_to_cpu(__entry->wrtid),
> >+		__entry->retries_left, __entry->max_retries,
> >+		__entry->retry, __entry->timeout, __entry->length,
> >+		__entry->base_version, __entry->mgmt_class, __entry-
> >>class_version,
> >+		__entry->method, be16_to_cpu(__entry->status),
> >+		be16_to_cpu(__entry->class_specific),
> >+		be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id),
> >+		be32_to_cpu(__entry->attr_mod),
> >+		be32_to_cpu(__entry->dlid), __entry->sl, __entry->pkey,
> >__entry->rqpn,
> >+		__entry->rqkey
> >+	)
> >+);
> >+
> >+DEFINE_EVENT(ib_mad_send_template, ib_mad_error_handler,
> >+	TP_PROTO(struct ib_mad_send_wr_private *wr, struct
> >rdma_mad_trace_addr *addr),
> >+	TP_ARGS(wr, addr));
> >+DEFINE_EVENT(ib_mad_send_template, ib_mad_ib_send_mad,
> >+	TP_PROTO(struct ib_mad_send_wr_private *wr, struct
> >rdma_mad_trace_addr *addr),
> >+	TP_ARGS(wr, addr));
> >+DEFINE_EVENT(ib_mad_send_template, ib_mad_send_done_resend,
> >+	TP_PROTO(struct ib_mad_send_wr_private *wr, struct
> >rdma_mad_trace_addr *addr),
> >+	TP_ARGS(wr, addr));
> >+
> >+TRACE_EVENT(ib_mad_send_done_handler,
> >+	TP_PROTO(struct ib_mad_send_wr_private *wr, struct ib_wc *wc),
> >+	TP_ARGS(wr, wc),
> >+
> >+	TP_STRUCT__entry(
> >+		__field(u8,             port_num)
> >+		__field(u8,             base_version)
> >+		__field(u8,             mgmt_class)
> >+		__field(u8,             class_version)
> >+		__field(u32,            qp_num)
> >+		__field(u64,            wrtid)
> >+		__field(u16,            status)
> >+		__field(u16,            wc_status)
> >+		__field(u32,            length)
> >+		__field(void *,         agent_priv)
> >+		__field(unsigned long,  timeout)
> >+		__field(u32,            dev_index)
> >+		__field(int,            retries_left)
> >+		__field(int,            max_retries)
> >+		__field(int,            retry)
> >+		__field(u8,             method)
> >+	),
> >+
> >+	TP_fast_assign(
> >+		__entry->dev_index = wr->mad_agent_priv->agent.device-
> >>index;
> >+		__entry->port_num = wr->mad_agent_priv-
> >>agent.port_num;
> >+		__entry->qp_num = wr->mad_agent_priv->qp_info->qp-
> >>qp_num;
> >+		__entry->agent_priv = wr->mad_agent_priv;
> >+		__entry->wrtid = wr->tid;
> >+		__entry->max_retries = wr->max_retries;
> >+		__entry->retries_left = wr->retries_left;
> >+		__entry->retry = wr->retry;
> >+		__entry->timeout = wr->timeout;
> >+		__entry->base_version = ((struct ib_mad_hdr *)wr-
> >>send_buf.mad)->base_version;
> >+		__entry->mgmt_class = ((struct ib_mad_hdr *)wr-
> >>send_buf.mad)->mgmt_class;
> >+		__entry->class_version = ((struct ib_mad_hdr *)wr-
> >>send_buf.mad)->class_version;
> >+		__entry->method = ((struct ib_mad_hdr *)wr-
> >>send_buf.mad)->method;
> >+		__entry->status = ((struct ib_mad_hdr *)wr-
> >>send_buf.mad)->status;
> >+		__entry->wc_status = wc->status;
> >+		__entry->length = wc->byte_len;
> >+	),
> >+
> >+	TP_printk("%d:%d QP%d : SEND WC Status %d : agent %p: " \
> >+		  "wrtid 0x%llx %d/%d retries(%d) timeout %lu length %d: hdr :
> >" \
> >+		  "base_ver 0x%x class 0x%x class_ver 0x%x method 0x%x " \
> >+		  "status 0x%x",
> >+		__entry->dev_index, __entry->port_num, __entry-
> >>qp_num,
> >+		__entry->wc_status,
> >+		__entry->agent_priv, be64_to_cpu(__entry->wrtid),
> >+		__entry->retries_left, __entry->max_retries,
> >+		__entry->retry, __entry->timeout,
> >+		__entry->length,
> >+		__entry->base_version, __entry->mgmt_class, __entry-
> >>class_version,
> >+		__entry->method, be16_to_cpu(__entry->status)
> >+	)
> >+);
> >+
> >+
> >+#endif /* _TRACE_IB_MAD_H */
> >+
> >+#include <trace/define_trace.h>
> >--
> >2.20.1
>
Ira Weiny Feb. 12, 2019, 11:33 p.m. UTC | #3
On Mon, Feb 11, 2019 at 10:02:02AM -0800, Michael Ruhl wrote:
> >-----Original Message-----
> >From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma-
> >owner@vger.kernel.org] On Behalf Of ira.weiny@intel.com
> >Sent: Monday, February 11, 2019 10:15 AM
> >To: linux-rdma@vger.kernel.org
> >Cc: Hal Rosenstock <hal@dev.mellanox.co.il>; Doug Ledford
> ><dledford@redhat.com>; Jason Gunthorpe <jgg@ziepe.ca>; Steven Rostedt
> ><rostedt@goodmis.org>; Ingo Molnar <mingo@redhat.com>; Alexei
> >Starovoitov <ast@kernel.org>; Daniel Borkmann <daniel@iogearbox.net>;
> >Leon Romanovsky <leon@kernel.org>; Weiny, Ira <ira.weiny@intel.com>
> >Subject: [PATCH for-next V4 1/6] IB/MAD: Add send path trace points
> >
> >From: Ira Weiny <ira.weiny@intel.com>
> >
> >Use the standard Linux trace mechanism to trace MADs being sent.  4 trace
> >points are added, when the MAD is posted to the qp, when the MAD is
> >completed, if a MAD is resent, and when the MAD completes in error.
> >
> >Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> >
> >---
> >
> >Changes since v3:
> >	Change dev_name to dev_index
> >
> >Changes since v2:
> >	Change license text to SPDX tag
> >	Change dev_name to string from array
> >	Reorder fields for more compact ring buffer utilization
> >	Use a defined roce address type for safer memcpy/memset
> >
> >Changes since v1:
> >	Update MAINTAINERS with tracing file
> >
> > MAINTAINERS                   |   1 +
> > drivers/infiniband/core/mad.c |  73 ++++++++++++++-
> > include/trace/events/ib_mad.h | 164
> >++++++++++++++++++++++++++++++++++
> > 3 files changed, 237 insertions(+), 1 deletion(-)
> > create mode 100644 include/trace/events/ib_mad.h
> >
> >diff --git a/MAINTAINERS b/MAINTAINERS
> >index 8c68de3cfd80..9254bd40f1ae 100644
> >--- a/MAINTAINERS
> >+++ b/MAINTAINERS
> >@@ -7491,6 +7491,7 @@ F:	drivers/infiniband/
> > F:	include/uapi/linux/if_infiniband.h
> > F:	include/uapi/rdma/
> > F:	include/rdma/
> >+F:	include/trace/events/ib_mad.h
> >
> > INGENIC JZ4780 DMA Driver
> > M:	Zubair Lutfullah Kakakhel <Zubair.Kakakhel@imgtec.com>
> >diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
> >index 7870823bac47..e166d584b80c 100644
> >--- a/drivers/infiniband/core/mad.c
> >+++ b/drivers/infiniband/core/mad.c
> >@@ -3,7 +3,7 @@
> >  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
> >  * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
> >  * Copyright (c) 2009 HNR Consulting. All rights reserved.
> >- * Copyright (c) 2014 Intel Corporation.  All rights reserved.
> >+ * Copyright (c) 2014,2018 Intel Corporation.  All rights reserved.
> >  *
> >  * This software is available to you under a choice of one of two
> >  * licenses.  You may choose to be licensed under the terms of the GNU
> >@@ -51,6 +51,51 @@
> > #include "opa_smi.h"
> > #include "agent.h"
> >
> >+struct rdma_mad_trace_addr {
> >+	u32 dlid;
> >+	struct roce_ah_attr roce_addr;
> >+	u8 sl;
> >+	u16 pkey;
> >+	u32 rqpn;
> >+	u32 rqkey;
> >+};
> >+static void trace_create_mad_addr(struct ib_device *dev, u8 pnum,
> >+				  struct ib_ud_wr *wr,
> >+				  struct rdma_mad_trace_addr *addr)
> >+{
> >+	struct rdma_ah_attr attr;
> >+
> >+	memset(&attr, 0, sizeof(attr));
> >+	rdma_query_ah(wr->ah, &attr);
> >+
> >+	/* These are common */
> >+	addr->sl = attr.sl;
> >+	ib_query_pkey(dev, pnum, wr->pkey_index, &addr->pkey);
> >+	addr->rqpn = wr->remote_qpn;
> >+	addr->rqkey = wr->remote_qkey;
> >+
> >+	switch (attr.type) {
> >+	case RDMA_AH_ATTR_TYPE_IB:
> >+		addr->dlid = attr.ib.dlid;
> >+		memset(&addr->roce_addr, 0, sizeof(addr->roce_addr));
> >+		break;
> >+	case RDMA_AH_ATTR_TYPE_OPA:
> >+		addr->dlid = attr.opa.dlid;
> >+		memset(&addr->roce_addr, 0, sizeof(addr->roce_addr));
> >+		break;
> >+	case RDMA_AH_ATTR_TYPE_ROCE:
> >+		addr->dlid = 0;
> >+		memcpy(&addr->roce_addr, &attr.roce, sizeof(addr-
> >>roce_addr));
> >+		break;
> >+	case RDMA_AH_ATTR_TYPE_UNDEFINED:
> >+		addr->dlid = 0;
> >+		memset(&addr->roce_addr, 0, sizeof(addr->roce_addr));
> >+		break;
> >+	}
> >+}
> >+#define CREATE_TRACE_POINTS
> >+#include <trace/events/ib_mad.h>
> >+
> > static int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
> > static int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
> >
> >@@ -1223,6 +1268,14 @@ int ib_send_mad(struct ib_mad_send_wr_private
> >*mad_send_wr)
> >
> > 	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
> > 	if (qp_info->send_queue.count < qp_info->send_queue.max_active)
> >{
> >+		if (trace_ib_mad_ib_send_mad_enabled()) {
> >+			struct rdma_mad_trace_addr addr;
> >+
> >+			trace_create_mad_addr(qp_info->port_priv->device,
> >+					      qp_info->port_priv->port_num,
> >+					      &mad_send_wr->send_wr,
> >&addr);
> >+			trace_ib_mad_ib_send_mad(mad_send_wr, &addr);
> >+		}
> > 		ret = ib_post_send(mad_agent->qp, &mad_send_wr-
> >>send_wr.wr,
> > 				   NULL);
> > 		list = &qp_info->send_queue.list;
> 
> Ira,
> 
> Are the trace_ib_mad_ib_send_mad_enabled() (and the other locations)
> actually necessary?
> 
> When you populate the TP_STRUCT_entry values, you can call a function to
> get information (i.e. see the hfi1/trac_ibhdrs.h  usage of hfi1_trace_packet_hdr_len().
> 
> You may be able to call the trace_create_mad_addr() there (will probably need
> to pass qp_info into trac_ib_mad_ib_send_mad()) rather than having to have
> this enable check in the code.


How much of an optimization is this?  Is it more for code cleanliness?  I don't
see how there would be much performance difference here.

If it is for code cleanliness will you accept me cleaning it up later?

Ira

> 
> Mike
> 
> 
> >@@ -2496,6 +2549,8 @@ static void ib_mad_send_done(struct ib_cq *cq,
> >struct ib_wc *wc)
> > 	send_queue = mad_list->mad_queue;
> > 	qp_info = send_queue->qp_info;
> >
> >+	trace_ib_mad_send_done_handler(mad_send_wr, wc);
> >+
> > retry:
> > 	ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent-
> >>device,
> > 			    mad_send_wr->header_mapping,
> >@@ -2527,6 +2582,14 @@ static void ib_mad_send_done(struct ib_cq *cq,
> >struct ib_wc *wc)
> > 	ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
> >
> > 	if (queued_send_wr) {
> >+		if (trace_ib_mad_send_done_resend_enabled()) {
> >+			struct rdma_mad_trace_addr addr;
> >+
> >+			trace_create_mad_addr(qp_info->port_priv->device,
> >+					      qp_info->port_priv->port_num,
> >+					      &mad_send_wr->send_wr,
> >&addr);
> >+
> >	trace_ib_mad_send_done_resend(queued_send_wr, &addr);
> >+		}
> > 		ret = ib_post_send(qp_info->qp, &queued_send_wr-
> >>send_wr.wr,
> > 				   NULL);
> > 		if (ret) {
> >@@ -2574,6 +2637,14 @@ static bool ib_mad_send_error(struct
> >ib_mad_port_private *port_priv,
> > 		if (mad_send_wr->retry) {
> > 			/* Repost send */
> > 			mad_send_wr->retry = 0;
> >+			if (trace_ib_mad_error_handler_enabled()) {
> >+				struct rdma_mad_trace_addr addr;
> >+
> >+				trace_create_mad_addr(qp_info->port_priv-
> >>device,
> >+						      qp_info->port_priv-
> >>port_num,
> >+						      &mad_send_wr->send_wr,
> >&addr);
> >+				trace_ib_mad_error_handler(mad_send_wr,
> >&addr);
> >+			}
> > 			ret = ib_post_send(qp_info->qp, &mad_send_wr-
> >>send_wr.wr,
> > 					   NULL);
> > 			if (!ret)
> >diff --git a/include/trace/events/ib_mad.h b/include/trace/events/ib_mad.h
> >new file mode 100644
> >index 000000000000..30f72a29cccb
> >--- /dev/null
> >+++ b/include/trace/events/ib_mad.h
> >@@ -0,0 +1,164 @@
> >+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
> >+
> >+/*
> >+ * Copyright (c) 2018 Intel Corporation.  All rights reserved.
> >+ */
> >+
> >+#undef TRACE_SYSTEM
> >+#define TRACE_SYSTEM ib_mad
> >+
> >+#if !defined(_TRACE_IB_MAD_H) || defined(TRACE_HEADER_MULTI_READ)
> >+#define _TRACE_IB_MAD_H
> >+
> >+#include <linux/tracepoint.h>
> >+#include <rdma/ib_mad.h>
> >+
> >+DECLARE_EVENT_CLASS(ib_mad_send_template,
> >+	TP_PROTO(struct ib_mad_send_wr_private *wr, struct
> >rdma_mad_trace_addr *addr),
> >+	TP_ARGS(wr, addr),
> >+
> >+	TP_STRUCT__entry(
> >+		__field(u8,             base_version)
> >+		__field(u8,             mgmt_class)
> >+		__field(u8,             class_version)
> >+		__field(u8,             port_num)
> >+		__field(u32,            qp_num)
> >+		__field(u8,             method)
> >+		__field(u8,             sl)
> >+		__field(u16,            attr_id)
> >+		__field(u32,            attr_mod)
> >+		__field(u64,            wrtid)
> >+		__field(u64,            tid)
> >+		__field(u16,            status)
> >+		__field(u16,            class_specific)
> >+		__field(u32,            length)
> >+		__field(u32,            dlid)
> >+		__field(u32,            rqpn)
> >+		__field(u32,            rqkey)
> >+		__field(u32,            dev_index)
> >+		__field(void *,         agent_priv)
> >+		__field(unsigned long,  timeout)
> >+		__field(int,            retries_left)
> >+		__field(int,            max_retries)
> >+		__field(int,            retry)
> >+		__field(u16,            pkey)
> >+	),
> >+
> >+	TP_fast_assign(
> >+		__entry->dev_index = wr->mad_agent_priv->agent.device-
> >>index;
> >+		__entry->port_num = wr->mad_agent_priv-
> >>agent.port_num;
> >+		__entry->qp_num = wr->mad_agent_priv->qp_info->qp-
> >>qp_num;
> >+		__entry->agent_priv = wr->mad_agent_priv;
> >+		__entry->wrtid = wr->tid;
> >+		__entry->max_retries = wr->max_retries;
> >+		__entry->retries_left = wr->retries_left;
> >+		__entry->retry = wr->retry;
> >+		__entry->timeout = wr->timeout;
> >+		__entry->length = wr->send_buf.hdr_len +
> >+				  wr->send_buf.data_len;
> >+		__entry->base_version = ((struct ib_mad_hdr *)wr-
> >>send_buf.mad)->base_version;
> >+		__entry->mgmt_class = ((struct ib_mad_hdr *)wr-
> >>send_buf.mad)->mgmt_class;
> >+		__entry->class_version = ((struct ib_mad_hdr *)wr-
> >>send_buf.mad)->class_version;
> >+		__entry->method = ((struct ib_mad_hdr *)wr-
> >>send_buf.mad)->method;
> >+		__entry->status = ((struct ib_mad_hdr *)wr-
> >>send_buf.mad)->status;
> >+		__entry->class_specific = ((struct ib_mad_hdr *)wr-
> >>send_buf.mad)->class_specific;
> >+		__entry->tid = ((struct ib_mad_hdr *)wr->send_buf.mad)-
> >>tid;
> >+		__entry->attr_id = ((struct ib_mad_hdr *)wr-
> >>send_buf.mad)->attr_id;
> >+		__entry->attr_mod = ((struct ib_mad_hdr *)wr-
> >>send_buf.mad)->attr_mod;
> >+		__entry->dlid = addr->dlid;
> >+		__entry->sl = addr->sl;
> >+		__entry->pkey = addr->pkey;
> >+		__entry->rqpn = addr->rqpn;
> >+		__entry->rqkey = addr->rqkey;
> >+	),
> >+
> >+	TP_printk("%d:%d QP%d agent %p: " \
> >+		  "wrtid 0x%llx; %d/%d retries(%d); timeout %lu length %d :
> >hdr : " \
> >+		  "base_ver 0x%x class 0x%x class_ver 0x%x method 0x%x " \
> >+		  "status 0x%x class_specific 0x%x tid 0x%llx attr_id 0x%x
> >attr_mod 0x%x " \
> >+		  " => dlid 0x%08x sl %d pkey 0x%x rpqn 0x%x rqpkey 0x%x",
> >+		__entry->dev_index, __entry->port_num, __entry-
> >>qp_num,
> >+		__entry->agent_priv, be64_to_cpu(__entry->wrtid),
> >+		__entry->retries_left, __entry->max_retries,
> >+		__entry->retry, __entry->timeout, __entry->length,
> >+		__entry->base_version, __entry->mgmt_class, __entry-
> >>class_version,
> >+		__entry->method, be16_to_cpu(__entry->status),
> >+		be16_to_cpu(__entry->class_specific),
> >+		be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id),
> >+		be32_to_cpu(__entry->attr_mod),
> >+		be32_to_cpu(__entry->dlid), __entry->sl, __entry->pkey,
> >__entry->rqpn,
> >+		__entry->rqkey
> >+	)
> >+);
> >+
> >+DEFINE_EVENT(ib_mad_send_template, ib_mad_error_handler,
> >+	TP_PROTO(struct ib_mad_send_wr_private *wr, struct
> >rdma_mad_trace_addr *addr),
> >+	TP_ARGS(wr, addr));
> >+DEFINE_EVENT(ib_mad_send_template, ib_mad_ib_send_mad,
> >+	TP_PROTO(struct ib_mad_send_wr_private *wr, struct
> >rdma_mad_trace_addr *addr),
> >+	TP_ARGS(wr, addr));
> >+DEFINE_EVENT(ib_mad_send_template, ib_mad_send_done_resend,
> >+	TP_PROTO(struct ib_mad_send_wr_private *wr, struct
> >rdma_mad_trace_addr *addr),
> >+	TP_ARGS(wr, addr));
> >+
> >+TRACE_EVENT(ib_mad_send_done_handler,
> >+	TP_PROTO(struct ib_mad_send_wr_private *wr, struct ib_wc *wc),
> >+	TP_ARGS(wr, wc),
> >+
> >+	TP_STRUCT__entry(
> >+		__field(u8,             port_num)
> >+		__field(u8,             base_version)
> >+		__field(u8,             mgmt_class)
> >+		__field(u8,             class_version)
> >+		__field(u32,            qp_num)
> >+		__field(u64,            wrtid)
> >+		__field(u16,            status)
> >+		__field(u16,            wc_status)
> >+		__field(u32,            length)
> >+		__field(void *,         agent_priv)
> >+		__field(unsigned long,  timeout)
> >+		__field(u32,            dev_index)
> >+		__field(int,            retries_left)
> >+		__field(int,            max_retries)
> >+		__field(int,            retry)
> >+		__field(u8,             method)
> >+	),
> >+
> >+	TP_fast_assign(
> >+		__entry->dev_index = wr->mad_agent_priv->agent.device-
> >>index;
> >+		__entry->port_num = wr->mad_agent_priv-
> >>agent.port_num;
> >+		__entry->qp_num = wr->mad_agent_priv->qp_info->qp-
> >>qp_num;
> >+		__entry->agent_priv = wr->mad_agent_priv;
> >+		__entry->wrtid = wr->tid;
> >+		__entry->max_retries = wr->max_retries;
> >+		__entry->retries_left = wr->retries_left;
> >+		__entry->retry = wr->retry;
> >+		__entry->timeout = wr->timeout;
> >+		__entry->base_version = ((struct ib_mad_hdr *)wr-
> >>send_buf.mad)->base_version;
> >+		__entry->mgmt_class = ((struct ib_mad_hdr *)wr-
> >>send_buf.mad)->mgmt_class;
> >+		__entry->class_version = ((struct ib_mad_hdr *)wr-
> >>send_buf.mad)->class_version;
> >+		__entry->method = ((struct ib_mad_hdr *)wr-
> >>send_buf.mad)->method;
> >+		__entry->status = ((struct ib_mad_hdr *)wr-
> >>send_buf.mad)->status;
> >+		__entry->wc_status = wc->status;
> >+		__entry->length = wc->byte_len;
> >+	),
> >+
> >+	TP_printk("%d:%d QP%d : SEND WC Status %d : agent %p: " \
> >+		  "wrtid 0x%llx %d/%d retries(%d) timeout %lu length %d: hdr :
> >" \
> >+		  "base_ver 0x%x class 0x%x class_ver 0x%x method 0x%x " \
> >+		  "status 0x%x",
> >+		__entry->dev_index, __entry->port_num, __entry-
> >>qp_num,
> >+		__entry->wc_status,
> >+		__entry->agent_priv, be64_to_cpu(__entry->wrtid),
> >+		__entry->retries_left, __entry->max_retries,
> >+		__entry->retry, __entry->timeout,
> >+		__entry->length,
> >+		__entry->base_version, __entry->mgmt_class, __entry-
> >>class_version,
> >+		__entry->method, be16_to_cpu(__entry->status)
> >+	)
> >+);
> >+
> >+
> >+#endif /* _TRACE_IB_MAD_H */
> >+
> >+#include <trace/define_trace.h>
> >--
> >2.20.1
>
Steven Rostedt Feb. 13, 2019, 12:38 a.m. UTC | #4
On Tue, 12 Feb 2019 15:33:07 -0800
Ira Weiny <ira.weiny@intel.com> wrote:

> > >+#define CREATE_TRACE_POINTS
> > >+#include <trace/events/ib_mad.h>
> > >+
> > > static int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
> > > static int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
> > >
> > >@@ -1223,6 +1268,14 @@ int ib_send_mad(struct ib_mad_send_wr_private
> > >*mad_send_wr)
> > >
> > > 	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
> > > 	if (qp_info->send_queue.count < qp_info->send_queue.max_active)
> > >{
> > >+		if (trace_ib_mad_ib_send_mad_enabled()) {
> > >+			struct rdma_mad_trace_addr addr;
> > >+
> > >+			trace_create_mad_addr(qp_info->port_priv->device,
> > >+					      qp_info->port_priv->port_num,
> > >+					      &mad_send_wr->send_wr,
> > >&addr);
> > >+			trace_ib_mad_ib_send_mad(mad_send_wr, &addr);
> > >+		}
> > > 		ret = ib_post_send(mad_agent->qp, &mad_send_wr-  
> > >>send_wr.wr,  
> > > 				   NULL);
> > > 		list = &qp_info->send_queue.list;  
> > 
> > Ira,
> > 
> > Are the trace_ib_mad_ib_send_mad_enabled() (and the other locations)
> > actually necessary?
> > 
> > When you populate the TP_STRUCT_entry values, you can call a function to
> > get information (i.e. see the hfi1/trac_ibhdrs.h  usage of hfi1_trace_packet_hdr_len().
> > 
> > You may be able to call the trace_create_mad_addr() there (will probably need
> > to pass qp_info into trac_ib_mad_ib_send_mad()) rather than having to have
> > this enable check in the code.  
> 
> 
> How much of an optimization is this?  Is it more for code cleanliness?  I don't
> see how there would be much performance difference here.
> 
> If it is for code cleanliness will you accept me cleaning it up later?

It looks like that "trace_create_mad_addr()" is not a trace event (I
would recommend renaming it to create_trace_mad_addr() to avoid the
confusion). That means it's a real function call that will get called
and perform work all the time, and we don't want that unless the
ip_mad_ip_send_mad trace event is enabled. Which shows that the
trace_ib_mad_ib_send_mad_enabled() check makes a significant difference.

-- Steve
Ira Weiny Feb. 13, 2019, 5:01 a.m. UTC | #5
> 
> On Tue, 12 Feb 2019 15:33:07 -0800
> Ira Weiny <ira.weiny@intel.com> wrote:
> 
> > > >+#define CREATE_TRACE_POINTS
> > > >+#include <trace/events/ib_mad.h>
> > > >+
> > > > static int mad_sendq_size = IB_MAD_QP_SEND_SIZE; static int
> > > > mad_recvq_size = IB_MAD_QP_RECV_SIZE;
> > > >
> > > >@@ -1223,6 +1268,14 @@ int ib_send_mad(struct
> > > >ib_mad_send_wr_private
> > > >*mad_send_wr)
> > > >
> > > > 	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
> > > > 	if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
> > > >+		if (trace_ib_mad_ib_send_mad_enabled()) {
> > > >+			struct rdma_mad_trace_addr addr;
> > > >+
> > > >+			trace_create_mad_addr(qp_info->port_priv->device,
> > > >+					      qp_info->port_priv->port_num,
> > > >+					      &mad_send_wr->send_wr,
> > > >&addr);
> > > >+			trace_ib_mad_ib_send_mad(mad_send_wr, &addr);
> > > >+		}
> > > > 		ret = ib_post_send(mad_agent->qp, &mad_send_wr-
> > > >>send_wr.wr,
> > > > 				   NULL);
> > > > 		list = &qp_info->send_queue.list;
> > >
> > > Ira,
> > >
> > > Are the trace_ib_mad_ib_send_mad_enabled() (and the other locations)
> > > actually necessary?
> > >
> > > When you populate the TP_STRUCT_entry values, you can call a
> > > function to get information (i.e. see the hfi1/trac_ibhdrs.h  usage of
> hfi1_trace_packet_hdr_len().
> > >
> > > You may be able to call the trace_create_mad_addr() there (will
> > > probably need to pass qp_info into trac_ib_mad_ib_send_mad()) rather
> > > than having to have this enable check in the code.
> >
> >
> > How much of an optimization is this?  Is it more for code cleanliness?
> > I don't see how there would be much performance difference here.
> >
> > If it is for code cleanliness will you accept me cleaning it up later?
> 
> It looks like that "trace_create_mad_addr()" is not a trace event (I would
> recommend renaming it to create_trace_mad_addr() to avoid the confusion).
> That means it's a real function call that will get called and perform work all
> the time, and we don't want that unless the ip_mad_ip_send_mad trace
> event is enabled. Which shows that the
> trace_ib_mad_ib_send_mad_enabled() check makes a significant difference.

I agree.  But Mike is implying that there is a way to have trace_create_mad_addr() called within the trace_ib_mad_ib_sned_mad_enabled() trace point.  IMO, there is not much difference where the helper gets called when enabled.  (probably a bit more efficient within the trace point but likely not much).

When the trace is disabled there is not a significant performance penalty.  Therefore, what Mike is suggesting simply makes the code a bit cleaner.  But I may not understand some magic within tracing.

If we can agree we are not looking at a bad performance hit here.  I propose we accept the patches as is and I will clean it up later.  (Including making the function name more clear.)

Any objections?

Ira
Michael J. Ruhl Feb. 13, 2019, 12:55 p.m. UTC | #6
>-----Original Message-----
>From: Weiny, Ira
>Sent: Wednesday, February 13, 2019 12:02 AM
>To: Steven Rostedt <rostedt@goodmis.org>
>Cc: Ruhl, Michael J <michael.j.ruhl@intel.com>; linux-rdma@vger.kernel.org;
>Hal Rosenstock <hal@dev.mellanox.co.il>; Doug Ledford
><dledford@redhat.com>; Jason Gunthorpe <jgg@ziepe.ca>; Ingo Molnar
><mingo@redhat.com>; Alexei Starovoitov <ast@kernel.org>; Daniel
>Borkmann <daniel@iogearbox.net>; Leon Romanovsky <leon@kernel.org>
>Subject: RE: [PATCH for-next V4 1/6] IB/MAD: Add send path trace points
>
>>
>> On Tue, 12 Feb 2019 15:33:07 -0800
>> Ira Weiny <ira.weiny@intel.com> wrote:
>>
>> > > >+#define CREATE_TRACE_POINTS
>> > > >+#include <trace/events/ib_mad.h>
>> > > >+
>> > > > static int mad_sendq_size = IB_MAD_QP_SEND_SIZE; static int
>> > > > mad_recvq_size = IB_MAD_QP_RECV_SIZE;
>> > > >
>> > > >@@ -1223,6 +1268,14 @@ int ib_send_mad(struct
>> > > >ib_mad_send_wr_private
>> > > >*mad_send_wr)
>> > > >
>> > > > 	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
>> > > > 	if (qp_info->send_queue.count < qp_info->send_queue.max_active)
>{
>> > > >+		if (trace_ib_mad_ib_send_mad_enabled()) {
>> > > >+			struct rdma_mad_trace_addr addr;
>> > > >+
>> > > >+			trace_create_mad_addr(qp_info->port_priv-
>>device,
>> > > >+					      qp_info->port_priv-
>>port_num,
>> > > >+					      &mad_send_wr->send_wr,
>> > > >&addr);
>> > > >+			trace_ib_mad_ib_send_mad(mad_send_wr,
>&addr);
>> > > >+		}
>> > > > 		ret = ib_post_send(mad_agent->qp, &mad_send_wr-
>> > > >>send_wr.wr,
>> > > > 				   NULL);
>> > > > 		list = &qp_info->send_queue.list;
>> > >
>> > > Ira,
>> > >
>> > > Are the trace_ib_mad_ib_send_mad_enabled() (and the other
>locations)
>> > > actually necessary?
>> > >
>> > > When you populate the TP_STRUCT_entry values, you can call a
>> > > function to get information (i.e. see the hfi1/trac_ibhdrs.h  usage of
>> hfi1_trace_packet_hdr_len().
>> > >
>> > > You may be able to call the trace_create_mad_addr() there (will
>> > > probably need to pass qp_info into trac_ib_mad_ib_send_mad()) rather
>> > > than having to have this enable check in the code.
>> >
>> >
>> > How much of an optimization is this?  Is it more for code cleanliness?
>> > I don't see how there would be much performance difference here.
>> >
>> > If it is for code cleanliness will you accept me cleaning it up later?
>>
>> It looks like that "trace_create_mad_addr()" is not a trace event (I would
>> recommend renaming it to create_trace_mad_addr() to avoid the
>confusion).
>> That means it's a real function call that will get called and perform work all
>> the time, and we don't want that unless the ip_mad_ip_send_mad trace
>> event is enabled. Which shows that the
>> trace_ib_mad_ib_send_mad_enabled() check makes a significant
>difference.
>
>I agree.  But Mike is implying that there is a way to have
>trace_create_mad_addr() called within the
>trace_ib_mad_ib_sned_mad_enabled() trace point.  IMO, there is not much
>difference where the helper gets called when enabled.  (probably a bit more
>efficient within the trace point but likely not much).
>
>When the trace is disabled there is not a significant performance penalty.
>Therefore, what Mike is suggesting simply makes the code a bit cleaner.  But I
>may not understand some magic within tracing.

This is for performance and cleanup.  If the _create_mad() function can be 
incorpated in th enty portion of the trace, then the explicit if trace_enabled()
portion would go away.

I haven't looked at the assembly, but I think that the if (trace_enabled) will 
actually be in the code path regardless of trace being enabled or not.  With
just the trace_xxx() in the code path, it will be not be present unless trace is
enabled (i.e. no if check, no performance cost).

>If we can agree we are not looking at a bad performance hit here.  I propose
>we accept the patches as is and I will clean it up later.  (Including making the
>function name more clear.)
>
>Any objections?

Nope.  I wasn't sure if added if statement had been done on purpose or not. :)

If performance is not an issue for this path, a follow up patch is fine with me.

Mike
 
>Ira
Steven Rostedt Feb. 13, 2019, 1:33 p.m. UTC | #7
On Wed, 13 Feb 2019 12:55:25 +0000
"Ruhl, Michael J" <michael.j.ruhl@intel.com> wrote:

> >I agree.  But Mike is implying that there is a way to have
> >trace_create_mad_addr() called within the
> >trace_ib_mad_ib_sned_mad_enabled() trace point.  IMO, there is not much

Yeah, it should be possible to just pass in qp_info as well to the
tracepoint and do the processing in the TP__fast_assign() portion.

> >difference where the helper gets called when enabled.  (probably a bit more
> >efficient within the trace point but likely not much).
> >
> >When the trace is disabled there is not a significant performance penalty.
> >Therefore, what Mike is suggesting simply makes the code a bit cleaner.  But I
> >may not understand some magic within tracing.  
> 
> This is for performance and cleanup.  If the _create_mad() function can be 
> incorpated in th enty portion of the trace, then the explicit if trace_enabled()
> portion would go away.

Note, the trace_enabled() is implemented with jump_labels, thus there
is no compare operation in the assembly. It's a simple nop, that gets
converted into a non-conditional jump when enabled. The performance
added by removing the if trace_enabled() is extremely negligible.
Unless of course it is run on an arch that doesn't support jump labels,
but that would have other performance issues as well.

> 
> I haven't looked at the assembly, but I think that the if (trace_enabled) will 
> actually be in the code path regardless of trace being enabled or not.

It shouldn't be. The jump label if block is set as an unlikely, and gcc
should know enough to move that code to the end of the function (out of
the hot path). Again, the only thing that the assembly should see is
a single nop.

>  With
> just the trace_xxx() in the code path, it will be not be present unless trace is
> enabled (i.e. no if check, no performance cost).

Again, there is no "if check".

> 
> >If we can agree we are not looking at a bad performance hit here.  I propose
> >we accept the patches as is and I will clean it up later.  (Including making the
> >function name more clear.)
> >
> >Any objections?  
> 
> Nope.  I wasn't sure if added if statement had been done on purpose or not. :)
> 
> If performance is not an issue for this path, a follow up patch is fine with me.

-- Steve
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 8c68de3cfd80..9254bd40f1ae 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7491,6 +7491,7 @@  F:	drivers/infiniband/
 F:	include/uapi/linux/if_infiniband.h
 F:	include/uapi/rdma/
 F:	include/rdma/
+F:	include/trace/events/ib_mad.h
 
 INGENIC JZ4780 DMA Driver
 M:	Zubair Lutfullah Kakakhel <Zubair.Kakakhel@imgtec.com>
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 7870823bac47..e166d584b80c 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -3,7 +3,7 @@ 
  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
  * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
  * Copyright (c) 2009 HNR Consulting. All rights reserved.
- * Copyright (c) 2014 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2014,2018 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -51,6 +51,51 @@ 
 #include "opa_smi.h"
 #include "agent.h"
 
+struct rdma_mad_trace_addr {
+	u32 dlid;
+	struct roce_ah_attr roce_addr;
+	u8 sl;
+	u16 pkey;
+	u32 rqpn;
+	u32 rqkey;
+};
+static void trace_create_mad_addr(struct ib_device *dev, u8 pnum,
+				  struct ib_ud_wr *wr,
+				  struct rdma_mad_trace_addr *addr)
+{
+	struct rdma_ah_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	rdma_query_ah(wr->ah, &attr);
+
+	/* These are common */
+	addr->sl = attr.sl;
+	ib_query_pkey(dev, pnum, wr->pkey_index, &addr->pkey);
+	addr->rqpn = wr->remote_qpn;
+	addr->rqkey = wr->remote_qkey;
+
+	switch (attr.type) {
+	case RDMA_AH_ATTR_TYPE_IB:
+		addr->dlid = attr.ib.dlid;
+		memset(&addr->roce_addr, 0, sizeof(addr->roce_addr));
+		break;
+	case RDMA_AH_ATTR_TYPE_OPA:
+		addr->dlid = attr.opa.dlid;
+		memset(&addr->roce_addr, 0, sizeof(addr->roce_addr));
+		break;
+	case RDMA_AH_ATTR_TYPE_ROCE:
+		addr->dlid = 0;
+		memcpy(&addr->roce_addr, &attr.roce, sizeof(addr->roce_addr));
+		break;
+	case RDMA_AH_ATTR_TYPE_UNDEFINED:
+		addr->dlid = 0;
+		memset(&addr->roce_addr, 0, sizeof(addr->roce_addr));
+		break;
+	}
+}
+#define CREATE_TRACE_POINTS
+#include <trace/events/ib_mad.h>
+
 static int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
 static int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
 
@@ -1223,6 +1268,14 @@  int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
 
 	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
 	if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
+		if (trace_ib_mad_ib_send_mad_enabled()) {
+			struct rdma_mad_trace_addr addr;
+
+			trace_create_mad_addr(qp_info->port_priv->device,
+					      qp_info->port_priv->port_num,
+					      &mad_send_wr->send_wr, &addr);
+			trace_ib_mad_ib_send_mad(mad_send_wr, &addr);
+		}
 		ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr,
 				   NULL);
 		list = &qp_info->send_queue.list;
@@ -2496,6 +2549,8 @@  static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc)
 	send_queue = mad_list->mad_queue;
 	qp_info = send_queue->qp_info;
 
+	trace_ib_mad_send_done_handler(mad_send_wr, wc);
+
 retry:
 	ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device,
 			    mad_send_wr->header_mapping,
@@ -2527,6 +2582,14 @@  static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc)
 	ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
 
 	if (queued_send_wr) {
+		if (trace_ib_mad_send_done_resend_enabled()) {
+			struct rdma_mad_trace_addr addr;
+
+			trace_create_mad_addr(qp_info->port_priv->device,
+					      qp_info->port_priv->port_num,
+					      &mad_send_wr->send_wr, &addr);
+			trace_ib_mad_send_done_resend(queued_send_wr, &addr);
+		}
 		ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr,
 				   NULL);
 		if (ret) {
@@ -2574,6 +2637,14 @@  static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
 		if (mad_send_wr->retry) {
 			/* Repost send */
 			mad_send_wr->retry = 0;
+			if (trace_ib_mad_error_handler_enabled()) {
+				struct rdma_mad_trace_addr addr;
+
+				trace_create_mad_addr(qp_info->port_priv->device,
+						      qp_info->port_priv->port_num,
+						      &mad_send_wr->send_wr, &addr);
+				trace_ib_mad_error_handler(mad_send_wr, &addr);
+			}
 			ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr,
 					   NULL);
 			if (!ret)
diff --git a/include/trace/events/ib_mad.h b/include/trace/events/ib_mad.h
new file mode 100644
index 000000000000..30f72a29cccb
--- /dev/null
+++ b/include/trace/events/ib_mad.h
@@ -0,0 +1,164 @@ 
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+
+/*
+ * Copyright (c) 2018 Intel Corporation.  All rights reserved.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ib_mad
+
+#if !defined(_TRACE_IB_MAD_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_IB_MAD_H
+
+#include <linux/tracepoint.h>
+#include <rdma/ib_mad.h>
+
+DECLARE_EVENT_CLASS(ib_mad_send_template,
+	TP_PROTO(struct ib_mad_send_wr_private *wr, struct rdma_mad_trace_addr *addr),
+	TP_ARGS(wr, addr),
+
+	TP_STRUCT__entry(
+		__field(u8,             base_version)
+		__field(u8,             mgmt_class)
+		__field(u8,             class_version)
+		__field(u8,             port_num)
+		__field(u32,            qp_num)
+		__field(u8,             method)
+		__field(u8,             sl)
+		__field(u16,            attr_id)
+		__field(u32,            attr_mod)
+		__field(u64,            wrtid)
+		__field(u64,            tid)
+		__field(u16,            status)
+		__field(u16,            class_specific)
+		__field(u32,            length)
+		__field(u32,            dlid)
+		__field(u32,            rqpn)
+		__field(u32,            rqkey)
+		__field(u32,            dev_index)
+		__field(void *,         agent_priv)
+		__field(unsigned long,  timeout)
+		__field(int,            retries_left)
+		__field(int,            max_retries)
+		__field(int,            retry)
+		__field(u16,            pkey)
+	),
+
+	TP_fast_assign(
+		__entry->dev_index = wr->mad_agent_priv->agent.device->index;
+		__entry->port_num = wr->mad_agent_priv->agent.port_num;
+		__entry->qp_num = wr->mad_agent_priv->qp_info->qp->qp_num;
+		__entry->agent_priv = wr->mad_agent_priv;
+		__entry->wrtid = wr->tid;
+		__entry->max_retries = wr->max_retries;
+		__entry->retries_left = wr->retries_left;
+		__entry->retry = wr->retry;
+		__entry->timeout = wr->timeout;
+		__entry->length = wr->send_buf.hdr_len +
+				  wr->send_buf.data_len;
+		__entry->base_version = ((struct ib_mad_hdr *)wr->send_buf.mad)->base_version;
+		__entry->mgmt_class = ((struct ib_mad_hdr *)wr->send_buf.mad)->mgmt_class;
+		__entry->class_version = ((struct ib_mad_hdr *)wr->send_buf.mad)->class_version;
+		__entry->method = ((struct ib_mad_hdr *)wr->send_buf.mad)->method;
+		__entry->status = ((struct ib_mad_hdr *)wr->send_buf.mad)->status;
+		__entry->class_specific = ((struct ib_mad_hdr *)wr->send_buf.mad)->class_specific;
+		__entry->tid = ((struct ib_mad_hdr *)wr->send_buf.mad)->tid;
+		__entry->attr_id = ((struct ib_mad_hdr *)wr->send_buf.mad)->attr_id;
+		__entry->attr_mod = ((struct ib_mad_hdr *)wr->send_buf.mad)->attr_mod;
+		__entry->dlid = addr->dlid;
+		__entry->sl = addr->sl;
+		__entry->pkey = addr->pkey;
+		__entry->rqpn = addr->rqpn;
+		__entry->rqkey = addr->rqkey;
+	),
+
+	TP_printk("%d:%d QP%d agent %p: " \
+		  "wrtid 0x%llx; %d/%d retries(%d); timeout %lu length %d : hdr : " \
+		  "base_ver 0x%x class 0x%x class_ver 0x%x method 0x%x " \
+		  "status 0x%x class_specific 0x%x tid 0x%llx attr_id 0x%x attr_mod 0x%x " \
+		  " => dlid 0x%08x sl %d pkey 0x%x rpqn 0x%x rqpkey 0x%x",
+		__entry->dev_index, __entry->port_num, __entry->qp_num,
+		__entry->agent_priv, be64_to_cpu(__entry->wrtid),
+		__entry->retries_left, __entry->max_retries,
+		__entry->retry, __entry->timeout, __entry->length,
+		__entry->base_version, __entry->mgmt_class, __entry->class_version,
+		__entry->method, be16_to_cpu(__entry->status),
+		be16_to_cpu(__entry->class_specific),
+		be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id),
+		be32_to_cpu(__entry->attr_mod),
+		be32_to_cpu(__entry->dlid), __entry->sl, __entry->pkey, __entry->rqpn,
+		__entry->rqkey
+	)
+);
+
+DEFINE_EVENT(ib_mad_send_template, ib_mad_error_handler,
+	TP_PROTO(struct ib_mad_send_wr_private *wr, struct rdma_mad_trace_addr *addr),
+	TP_ARGS(wr, addr));
+DEFINE_EVENT(ib_mad_send_template, ib_mad_ib_send_mad,
+	TP_PROTO(struct ib_mad_send_wr_private *wr, struct rdma_mad_trace_addr *addr),
+	TP_ARGS(wr, addr));
+DEFINE_EVENT(ib_mad_send_template, ib_mad_send_done_resend,
+	TP_PROTO(struct ib_mad_send_wr_private *wr, struct rdma_mad_trace_addr *addr),
+	TP_ARGS(wr, addr));
+
+TRACE_EVENT(ib_mad_send_done_handler,
+	TP_PROTO(struct ib_mad_send_wr_private *wr, struct ib_wc *wc),
+	TP_ARGS(wr, wc),
+
+	TP_STRUCT__entry(
+		__field(u8,             port_num)
+		__field(u8,             base_version)
+		__field(u8,             mgmt_class)
+		__field(u8,             class_version)
+		__field(u32,            qp_num)
+		__field(u64,            wrtid)
+		__field(u16,            status)
+		__field(u16,            wc_status)
+		__field(u32,            length)
+		__field(void *,         agent_priv)
+		__field(unsigned long,  timeout)
+		__field(u32,            dev_index)
+		__field(int,            retries_left)
+		__field(int,            max_retries)
+		__field(int,            retry)
+		__field(u8,             method)
+	),
+
+	TP_fast_assign(
+		__entry->dev_index = wr->mad_agent_priv->agent.device->index;
+		__entry->port_num = wr->mad_agent_priv->agent.port_num;
+		__entry->qp_num = wr->mad_agent_priv->qp_info->qp->qp_num;
+		__entry->agent_priv = wr->mad_agent_priv;
+		__entry->wrtid = wr->tid;
+		__entry->max_retries = wr->max_retries;
+		__entry->retries_left = wr->retries_left;
+		__entry->retry = wr->retry;
+		__entry->timeout = wr->timeout;
+		__entry->base_version = ((struct ib_mad_hdr *)wr->send_buf.mad)->base_version;
+		__entry->mgmt_class = ((struct ib_mad_hdr *)wr->send_buf.mad)->mgmt_class;
+		__entry->class_version = ((struct ib_mad_hdr *)wr->send_buf.mad)->class_version;
+		__entry->method = ((struct ib_mad_hdr *)wr->send_buf.mad)->method;
+		__entry->status = ((struct ib_mad_hdr *)wr->send_buf.mad)->status;
+		__entry->wc_status = wc->status;
+		__entry->length = wc->byte_len;
+	),
+
+	TP_printk("%d:%d QP%d : SEND WC Status %d : agent %p: " \
+		  "wrtid 0x%llx %d/%d retries(%d) timeout %lu length %d: hdr : " \
+		  "base_ver 0x%x class 0x%x class_ver 0x%x method 0x%x " \
+		  "status 0x%x",
+		__entry->dev_index, __entry->port_num, __entry->qp_num,
+		__entry->wc_status,
+		__entry->agent_priv, be64_to_cpu(__entry->wrtid),
+		__entry->retries_left, __entry->max_retries,
+		__entry->retry, __entry->timeout,
+		__entry->length,
+		__entry->base_version, __entry->mgmt_class, __entry->class_version,
+		__entry->method, be16_to_cpu(__entry->status)
+	)
+);
+
+
+#endif /* _TRACE_IB_MAD_H */
+
+#include <trace/define_trace.h>