diff mbox series

xprtrdma: Prevent leak of rpcrdma_rep objects

Message ID 20181207160645.5494.42006.stgit@manet.1015granger.net (mailing list archive)
State Accepted
Commit 07e10308ee5da8e6132e0b737ece1c99dd651fb6
Headers show
Series xprtrdma: Prevent leak of rpcrdma_rep objects | expand

Commit Message

Chuck Lever Dec. 7, 2018, 4:11 p.m. UTC
If a reply has been processed but the RPC is later retransmitted
anyway, the req->rl_reply field still contains the only pointer to
the old rpcrdma rep. When the next reply comes in, the reply handler
will stomp on the rl_reply field, leaking the old rep.

A trace event is added to capture such leaks.

This problem seems to be worsened by the restructuring of the RPC
Call path in v4.20. Fully addressing this issue will require at
least a re-architecture of the disconnect logic, which is not
appropriate during -rc.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
Hi Anna-

Would you consider this fix for v4.20-rc please?


 include/trace/events/rpcrdma.h |   28 ++++++++++++++++++++++++++++
 net/sunrpc/xprtrdma/rpc_rdma.c |    4 ++++
 2 files changed, 32 insertions(+)
diff mbox series

Patch

diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index b093058..602972d 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -917,6 +917,34 @@ 
 DEFINE_CB_EVENT(xprtrdma_cb_call);
 DEFINE_CB_EVENT(xprtrdma_cb_reply);
 
+TRACE_EVENT(xprtrdma_leaked_rep,
+	TP_PROTO(
+		const struct rpc_rqst *rqst,
+		const struct rpcrdma_rep *rep
+	),
+
+	TP_ARGS(rqst, rep),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, task_id)
+		__field(unsigned int, client_id)
+		__field(u32, xid)
+		__field(const void *, rep)
+	),
+
+	TP_fast_assign(
+		__entry->task_id = rqst->rq_task->tk_pid;
+		__entry->client_id = rqst->rq_task->tk_client->cl_clid;
+		__entry->xid = be32_to_cpu(rqst->rq_xid);
+		__entry->rep = rep;
+	),
+
+	TP_printk("task:%u@%u xid=0x%08x rep=%p",
+		__entry->task_id, __entry->client_id, __entry->xid,
+		__entry->rep
+	)
+);
+
 /**
  ** Server-side RPC/RDMA events
  **/
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 9f53e02..a2eb647 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1356,6 +1356,10 @@  void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
 	}
 
 	req = rpcr_to_rdmar(rqst);
+	if (req->rl_reply) {
+		trace_xprtrdma_leaked_rep(rqst, req->rl_reply);
+		rpcrdma_recv_buffer_put(req->rl_reply);
+	}
 	req->rl_reply = rep;
 	rep->rr_rqst = rqst;
 	clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);