@@ -195,6 +195,10 @@ extern struct svc_rdma_send_ctxt *
svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma);
extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt);
+extern int svc_rdma_skip_payloads(const struct xdr_buf *xdr,
+ const struct svc_rdma_recv_ctxt *rctxt,
+ int (*actor)(const struct xdr_buf *, void *),
+ void *data);
extern int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr);
extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *sctxt,
@@ -515,6 +515,78 @@ svc_rdma_encode_reply_chunk(const struct svc_rdma_recv_ctxt *rctxt,
return svc_rdma_encode_write_chunk(sctxt, &payload);
}
+static inline int
+xdr_buf_process_region(const struct xdr_buf *xdr,
+ unsigned int offset, unsigned int length,
+ int (*actor)(const struct xdr_buf *, void *),
+ void *data)
+{
+ struct xdr_buf subbuf;
+
+ if (!length)
+ return 0;
+ if (xdr_buf_subsegment(xdr, &subbuf, offset, length))
+ return -EMSGSIZE;
+ return actor(&subbuf, data);
+}
+
+/**
+ * svc_rdma_skip_payloads - Call an actor for non-payload regions of @xdr
+ * @xdr: xdr_buf to process
+ * @rctxt: Write and Reply chunks provided by client
+ * @actor: function to invoke on that region
+ * @data: pointer to arguments for @actor
+ *
+ * This mechanism must ignore not only READ payloads that were already
+ * sent via RDMA Write, but also XDR padding for those payloads that
+ * the upper layer has added.
+ *
+ * Assumptions:
+ * The xdr->len and rp_ fields are aligned to 4-byte multiples.
+ *
+ * Returns:
+ * On success, zero,
+ * %-EMSGSIZE on XDR buffer overflow, or
+ * The return value of @actor
+ */
+int svc_rdma_skip_payloads(const struct xdr_buf *xdr,
+ const struct svc_rdma_recv_ctxt *rctxt,
+ int (*actor)(const struct xdr_buf *, void *),
+ void *data)
+{
+ const unsigned int num_payloads = rctxt ? rctxt->rc_cur_payload : 0;
+ unsigned int offset, length;
+ int i, ret;
+
+ if (likely(!num_payloads))
+ return actor(xdr, data);
+
+ /* Before the first READ payload */
+ offset = 0;
+ length = rctxt->rc_read_payloads[0].rp_offset;
+ ret = xdr_buf_process_region(xdr, offset, length, actor, data);
+ if (ret < 0)
+ return ret;
+
+ /* Any middle READ payloads */
+ for (i = 0; i + 1 < num_payloads; i++) {
+ offset = xdr_align_size(length + rctxt->rc_read_payloads[i].rp_length);
+ length = rctxt->rc_read_payloads[i + 1].rp_offset - offset;
+ ret = xdr_buf_process_region(xdr, offset, length, actor, data);
+ if (ret < 0)
+ return ret;
+ }
+
+ /* After the last READ payload */
+ offset = xdr_align_size(length + rctxt->rc_read_payloads[i].rp_length);
+ length = xdr->len - offset;
+ ret = xdr_buf_process_region(xdr, offset, length, actor, data);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt,
struct page *page,
We'll need a generic mechanism for processing only the parts of an egress RPC message that are _not_ a READ payload. This will be used in subsequent patches. This is a separate patch to reduce the complexity of subsequent patches, so that the logic of this new mechanism can be separately reviewed. Signed-off-by: Chuck Lever <chuck.lever@oracle.com> --- include/linux/sunrpc/svc_rdma.h | 4 ++ net/sunrpc/xprtrdma/svc_rdma_sendto.c | 72 +++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+)