@@ -336,6 +336,44 @@
), \
TP_ARGS(rqst))
+DECLARE_EVENT_CLASS(xdr_buf_class,
+ TP_PROTO(
+ const struct xdr_buf *xdr
+ ),
+
+ TP_ARGS(xdr),
+
+ TP_STRUCT__entry(
+ __field(const void *, head_base)
+ __field(size_t, head_len)
+ __field(const void *, tail_base)
+ __field(size_t, tail_len)
+ __field(unsigned int, page_len)
+ __field(unsigned int, msg_len)
+ ),
+
+ TP_fast_assign(
+ __entry->head_base = xdr->head[0].iov_base;
+ __entry->head_len = xdr->head[0].iov_len;
+ __entry->tail_base = xdr->tail[0].iov_base;
+ __entry->tail_len = xdr->tail[0].iov_len;
+ __entry->page_len = xdr->page_len;
+ __entry->msg_len = xdr->len;
+ ),
+
+ TP_printk("head=[%p,%zu] page=%u tail=[%p,%zu] len=%u",
+ __entry->head_base, __entry->head_len, __entry->page_len,
+ __entry->tail_base, __entry->tail_len, __entry->msg_len
+ )
+);
+
+#define DEFINE_XDRBUF_EVENT(name) \
+ DEFINE_EVENT(xdr_buf_class, name, \
+ TP_PROTO( \
+ const struct xdr_buf *xdr \
+ ), \
+ TP_ARGS(xdr))
+
/**
** Connection events
**/
@@ -1634,6 +1672,8 @@
)
);
+DEFINE_XDRBUF_EVENT(svcrdma_send_pullup);
+
TRACE_EVENT(svcrdma_send_failed,
TP_PROTO(
const struct svc_rqst *rqst,
@@ -537,16 +537,32 @@ void svc_rdma_sync_reply_hdr(struct svcxprt_rdma *rdma,
DMA_TO_DEVICE);
}
-/* If the xdr_buf has more elements than the device can
- * transmit in a single RDMA Send, then the reply will
- * have to be copied into a bounce buffer.
+/**
+ * svc_rdma_pull_up_needed - Determine whether to use pull-up
+ * @rdma: controlling transport
+ * @ctxt: I/O resources for an RDMA Send
+ * @xdr: xdr_buf containing RPC message to transmit
+ * @wr_lst: pointer to start of Write chunk list
+ *
+ * Returns:
+ * %true if pull-up should be used
+ * %false otherwise
*/
static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt,
struct xdr_buf *xdr,
__be32 *wr_lst)
{
int elements;
+ /* Avoid the overhead of DMA mapping for small messages.
+ */
+ if (xdr->len < RPCRDMA_V1_DEF_INLINE_SIZE >> 1)
+ return true;
+
+ /* Check whether the xdr_buf has more elements than can
+ * fit in a single RDMA Send.
+ */
/* xdr->head */
elements = 1;
@@ -627,6 +643,7 @@ static int svc_rdma_pull_up_reply_msg(struct svcxprt_rdma *rdma,
ctxt->sc_sges[0].length,
DMA_TO_DEVICE);
+ trace_svcrdma_send_pullup(xdr);
return 0;
}
@@ -652,7 +669,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
u32 xdr_pad;
int ret;
- if (svc_rdma_pull_up_needed(rdma, xdr, wr_lst))
+ if (svc_rdma_pull_up_needed(rdma, ctxt, xdr, wr_lst))
return svc_rdma_pull_up_reply_msg(rdma, ctxt, xdr, wr_lst);
++ctxt->sc_cur_sge_no;
On some platforms, DMA mapping part of a page is more costly than copying bytes. Indeed, not involving the I/O MMU can help the RPC/RDMA transport scale better for tiny I/Os across more RDMA devices. This is because interaction with the I/O MMU is eliminated for each of these small I/Os. Without the explicit unmapping, the NIC no longer needs to do a costly internal TLB shoot down for buffers that are just a handful of bytes. The heuristic for now is to pull-up when the size of the RPC message body is smaller than half the minimum Send buffer size. Signed-off-by: Chuck Lever <chuck.lever@oracle.com> --- include/trace/events/rpcrdma.h | 40 +++++++++++++++++++++++++++++++++ net/sunrpc/xprtrdma/svc_rdma_sendto.c | 25 +++++++++++++++++---- 2 files changed, 61 insertions(+), 4 deletions(-)