diff mbox series

[rdma-next,09/13] RDMA/mana_ib: UD/GSI work requests

Message ID 1737394039-28772-10-git-send-email-kotaranov@linux.microsoft.com (mailing list archive)
State New
Delegated to: Netdev Maintainers
Headers show
Series RDMA/mana_ib: Enable CM for mana_ib | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Guessed tree name to be net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 1 maintainers not CCed: andrew+netdev@lunn.ch
netdev/build_clang fail Errors and warnings before: 51 this patch: 54
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 14 this patch: 16
netdev/checkpatch warning WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 81 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns WARNING: line length of 99 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Konstantin Taranov Jan. 20, 2025, 5:27 p.m. UTC
From: Konstantin Taranov <kotaranov@microsoft.com>

Implement post send and post recv for UD/GSI QPs.
Add information about posted requests into shadow queues.

Co-developed-by: Shiraz Saleem <shirazsaleem@microsoft.com>
Signed-off-by: Shiraz Saleem <shirazsaleem@microsoft.com>
Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
---
 drivers/infiniband/hw/mana/Makefile           |   2 +-
 drivers/infiniband/hw/mana/device.c           |   2 +
 drivers/infiniband/hw/mana/mana_ib.h          |  33 ++++
 drivers/infiniband/hw/mana/qp.c               |  21 ++-
 drivers/infiniband/hw/mana/shadow_queue.h     | 115 ++++++++++++
 drivers/infiniband/hw/mana/wr.c               | 168 ++++++++++++++++++
 .../net/ethernet/microsoft/mana/gdma_main.c   |   2 +
 7 files changed, 341 insertions(+), 2 deletions(-)
 create mode 100644 drivers/infiniband/hw/mana/shadow_queue.h
 create mode 100644 drivers/infiniband/hw/mana/wr.c
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/mana/Makefile b/drivers/infiniband/hw/mana/Makefile
index 6e56f77..79426e7 100644
--- a/drivers/infiniband/hw/mana/Makefile
+++ b/drivers/infiniband/hw/mana/Makefile
@@ -1,4 +1,4 @@ 
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_MANA_INFINIBAND) += mana_ib.o
 
-mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o ah.o
+mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o ah.o wr.o
diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
index d534ef1..1da86c3 100644
--- a/drivers/infiniband/hw/mana/device.c
+++ b/drivers/infiniband/hw/mana/device.c
@@ -40,6 +40,8 @@  static const struct ib_device_ops mana_ib_dev_ops = {
 	.mmap = mana_ib_mmap,
 	.modify_qp = mana_ib_modify_qp,
 	.modify_wq = mana_ib_modify_wq,
+	.post_recv = mana_ib_post_recv,
+	.post_send = mana_ib_post_send,
 	.query_device = mana_ib_query_device,
 	.query_gid = mana_ib_query_gid,
 	.query_pkey = mana_ib_query_pkey,
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
index 7b079d8..6265c39 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -14,6 +14,7 @@ 
 #include <linux/dmapool.h>
 
 #include <net/mana/mana.h>
+#include "shadow_queue.h"
 
 #define PAGE_SZ_BM                                                             \
 	(SZ_4K | SZ_8K | SZ_16K | SZ_32K | SZ_64K | SZ_128K | SZ_256K |        \
@@ -165,6 +166,9 @@  struct mana_ib_qp {
 	/* The port on the IB device, starting with 1 */
 	u32 port;
 
+	struct shadow_queue shadow_rq;
+	struct shadow_queue shadow_sq;
+
 	refcount_t		refcount;
 	struct completion	free;
 };
@@ -404,6 +408,30 @@  struct mana_rnic_set_qp_state_resp {
 	struct gdma_resp_hdr hdr;
 }; /* HW Data */
 
+enum WQE_OPCODE_TYPES {
+	WQE_TYPE_UD_SEND = 0,
+	WQE_TYPE_UD_RECV = 8,
+}; /* HW DATA */
+
+struct rdma_send_oob {
+	u32 wqe_type	: 5;
+	u32 fence	: 1;
+	u32 signaled	: 1;
+	u32 solicited	: 1;
+	u32 psn		: 24;
+
+	u32 ssn_or_rqpn	: 24;
+	u32 reserved1	: 8;
+	union {
+		struct {
+			u32 remote_qkey;
+			u32 immediate;
+			u32 reserved1;
+			u32 reserved2;
+		} ud_send;
+	};
+}; /* HW DATA */
+
 static inline struct gdma_context *mdev_to_gc(struct mana_ib_dev *mdev)
 {
 	return mdev->gdma_dev->gdma_context;
@@ -562,4 +590,9 @@  int mana_ib_gd_destroy_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp);
 int mana_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
 		      struct ib_udata *udata);
 int mana_ib_destroy_ah(struct ib_ah *ah, u32 flags);
+
+int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr);
+int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		      const struct ib_send_wr **bad_wr);
 #endif
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
index fea45be..051ea03 100644
--- a/drivers/infiniband/hw/mana/qp.c
+++ b/drivers/infiniband/hw/mana/qp.c
@@ -562,10 +562,23 @@  static int mana_ib_create_ud_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
 	}
 	doorbell = gc->mana_ib.doorbell;
 
+	err = create_shadow_queue(&qp->shadow_rq, attr->cap.max_recv_wr,
+				  sizeof(struct ud_rq_shadow_wqe));
+	if (err) {
+		ibdev_err(&mdev->ib_dev, "Failed to create shadow rq err %d\n", err);
+		goto destroy_queues;
+	}
+	err = create_shadow_queue(&qp->shadow_sq, attr->cap.max_send_wr,
+				  sizeof(struct ud_sq_shadow_wqe));
+	if (err) {
+		ibdev_err(&mdev->ib_dev, "Failed to create shadow sq err %d\n", err);
+		goto destroy_shadow_queues;
+	}
+
 	err = mana_ib_gd_create_ud_qp(mdev, qp, attr, doorbell, attr->qp_type);
 	if (err) {
 		ibdev_err(&mdev->ib_dev, "Failed to create ud qp  %d\n", err);
-		goto destroy_queues;
+		goto destroy_shadow_queues;
 	}
 	qp->ibqp.qp_num = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id;
 	qp->port = attr->port_num;
@@ -575,6 +588,9 @@  static int mana_ib_create_ud_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
 
 	return 0;
 
+destroy_shadow_queues:
+	destroy_shadow_queue(&qp->shadow_rq);
+	destroy_shadow_queue(&qp->shadow_sq);
 destroy_queues:
 	while (i-- > 0)
 		mana_ib_destroy_queue(mdev, &qp->ud_qp.queues[i]);
@@ -754,6 +770,9 @@  static int mana_ib_destroy_ud_qp(struct mana_ib_qp *qp, struct ib_udata *udata)
 		container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
 	int i;
 
+	destroy_shadow_queue(&qp->shadow_rq);
+	destroy_shadow_queue(&qp->shadow_sq);
+
 	/* Ignore return code as there is not much we can do about it.
 	 * The error message is printed inside.
 	 */
diff --git a/drivers/infiniband/hw/mana/shadow_queue.h b/drivers/infiniband/hw/mana/shadow_queue.h
new file mode 100644
index 0000000..d8bfb4c
--- /dev/null
+++ b/drivers/infiniband/hw/mana/shadow_queue.h
@@ -0,0 +1,115 @@ 
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#ifndef _MANA_SHADOW_QUEUE_H_
+#define _MANA_SHADOW_QUEUE_H_
+
+struct shadow_wqe_header {
+	u16 opcode;
+	u16 error_code;
+	u32 posted_wqe_size;
+	u64 wr_id;
+};
+
+struct ud_rq_shadow_wqe {
+	struct shadow_wqe_header header;
+	u32 byte_len;
+	u32 src_qpn;
+};
+
+struct ud_sq_shadow_wqe {
+	struct shadow_wqe_header header;
+};
+
+struct shadow_queue {
+	/* Unmasked producer index, Incremented on wqe posting */
+	u64 prod_idx;
+	/* Unmasked consumer index, Incremented on cq polling */
+	u64 cons_idx;
+	/* Unmasked index of next-to-complete (from HW) shadow WQE */
+	u64 next_to_complete_idx;
+	/* queue size in wqes */
+	u32 length;
+	/* distance between elements in bytes */
+	u32 stride;
+	/* ring buffer holding wqes */
+	void *buffer;
+};
+
+static inline int create_shadow_queue(struct shadow_queue *queue, uint32_t length, uint32_t stride)
+{
+	queue->buffer = kvmalloc(length * stride, GFP_KERNEL);
+	if (!queue->buffer)
+		return -ENOMEM;
+
+	queue->length = length;
+	queue->stride = stride;
+
+	return 0;
+}
+
+static inline void destroy_shadow_queue(struct shadow_queue *queue)
+{
+	kvfree(queue->buffer);
+}
+
+static inline bool shadow_queue_full(struct shadow_queue *queue)
+{
+	return (queue->prod_idx - queue->cons_idx) >= queue->length;
+}
+
+static inline bool shadow_queue_empty(struct shadow_queue *queue)
+{
+	return queue->prod_idx == queue->cons_idx;
+}
+
+static inline void *
+shadow_queue_get_element(const struct shadow_queue *queue, u64 unmasked_index)
+{
+	u32 index = unmasked_index % queue->length;
+
+	return ((u8 *)queue->buffer + index * queue->stride);
+}
+
+static inline void *
+shadow_queue_producer_entry(struct shadow_queue *queue)
+{
+	return shadow_queue_get_element(queue, queue->prod_idx);
+}
+
+static inline void *
+shadow_queue_get_next_to_consume(const struct shadow_queue *queue)
+{
+	if (queue->cons_idx == queue->next_to_complete_idx)
+		return NULL;
+
+	return shadow_queue_get_element(queue, queue->cons_idx);
+}
+
+static inline void *
+shadow_queue_get_next_to_complete(struct shadow_queue *queue)
+{
+	if (queue->next_to_complete_idx == queue->prod_idx)
+		return NULL;
+
+	return shadow_queue_get_element(queue, queue->next_to_complete_idx);
+}
+
+static inline void shadow_queue_advance_producer(struct shadow_queue *queue)
+{
+	queue->prod_idx++;
+}
+
+static inline void shadow_queue_advance_consumer(struct shadow_queue *queue)
+{
+	queue->cons_idx++;
+}
+
+static inline void shadow_queue_advance_next_to_complete(struct shadow_queue *queue)
+{
+	queue->next_to_complete_idx++;
+}
+
+#endif
diff --git a/drivers/infiniband/hw/mana/wr.c b/drivers/infiniband/hw/mana/wr.c
new file mode 100644
index 0000000..1813567
--- /dev/null
+++ b/drivers/infiniband/hw/mana/wr.c
@@ -0,0 +1,168 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+#define MAX_WR_SGL_NUM (2)
+
+static int mana_ib_post_recv_ud(struct mana_ib_qp *qp, const struct ib_recv_wr *wr)
+{
+	struct mana_ib_dev *mdev = container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+	struct gdma_queue *queue = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].kmem;
+	struct gdma_posted_wqe_info wqe_info = {0};
+	struct gdma_sge gdma_sgl[MAX_WR_SGL_NUM];
+	struct gdma_wqe_request wqe_req = {0};
+	struct ud_rq_shadow_wqe *shadow_wqe;
+	int err, i;
+
+	if (shadow_queue_full(&qp->shadow_rq))
+		return -EINVAL;
+
+	if (wr->num_sge > MAX_WR_SGL_NUM)
+		return -EINVAL;
+
+	for (i = 0; i < wr->num_sge; ++i) {
+		gdma_sgl[i].address = wr->sg_list[i].addr;
+		gdma_sgl[i].mem_key = wr->sg_list[i].lkey;
+		gdma_sgl[i].size = wr->sg_list[i].length;
+	}
+	wqe_req.num_sge = wr->num_sge;
+	wqe_req.sgl = gdma_sgl;
+
+	err = mana_gd_post_work_request(queue, &wqe_req, &wqe_info);
+	if (err)
+		return err;
+
+	shadow_wqe = shadow_queue_producer_entry(&qp->shadow_rq);
+	memset(shadow_wqe, 0, sizeof(*shadow_wqe));
+	shadow_wqe->header.opcode = IB_WC_RECV;
+	shadow_wqe->header.wr_id = wr->wr_id;
+	shadow_wqe->header.posted_wqe_size = wqe_info.wqe_size_in_bu;
+	shadow_queue_advance_producer(&qp->shadow_rq);
+
+	mana_gd_wq_ring_doorbell(mdev_to_gc(mdev), queue);
+	return 0;
+}
+
+int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr)
+{
+	struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+	int err = 0;
+
+	for (; wr; wr = wr->next) {
+		switch (ibqp->qp_type) {
+		case IB_QPT_UD:
+		case IB_QPT_GSI:
+			err = mana_ib_post_recv_ud(qp, wr);
+			if (unlikely(err)) {
+				*bad_wr = wr;
+				return err;
+			}
+			break;
+		default:
+			ibdev_dbg(ibqp->device, "Posting recv wr on qp type %u is not supported\n",
+				  ibqp->qp_type);
+			return -EINVAL;
+		}
+	}
+
+	return err;
+}
+
+static int mana_ib_post_send_ud(struct mana_ib_qp *qp, const struct ib_ud_wr *wr)
+{
+	struct mana_ib_dev *mdev = container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+	struct mana_ib_ah *ah = container_of(wr->ah, struct mana_ib_ah, ibah);
+	struct net_device *ndev = mana_ib_get_netdev(&mdev->ib_dev, qp->port);
+	struct gdma_queue *queue = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].kmem;
+	struct gdma_sge gdma_sgl[MAX_WR_SGL_NUM + 1];
+	struct gdma_posted_wqe_info wqe_info = {0};
+	struct gdma_wqe_request wqe_req = {0};
+	struct rdma_send_oob send_oob = {0};
+	struct ud_sq_shadow_wqe *shadow_wqe;
+	int err, i;
+
+	if (!ndev) {
+		ibdev_dbg(&mdev->ib_dev, "Invalid port %u in QP %u\n",
+			  qp->port, qp->ibqp.qp_num);
+		return -EINVAL;
+	}
+
+	if (wr->wr.opcode != IB_WR_SEND)
+		return -EINVAL;
+
+	if (shadow_queue_full(&qp->shadow_sq))
+		return -EINVAL;
+
+	if (wr->wr.num_sge > MAX_WR_SGL_NUM)
+		return -EINVAL;
+
+	gdma_sgl[0].address = ah->dma_handle;
+	gdma_sgl[0].mem_key = qp->ibqp.pd->local_dma_lkey;
+	gdma_sgl[0].size = sizeof(struct mana_ib_av);
+	for (i = 0; i < wr->wr.num_sge; ++i) {
+		gdma_sgl[i + 1].address = wr->wr.sg_list[i].addr;
+		gdma_sgl[i + 1].mem_key = wr->wr.sg_list[i].lkey;
+		gdma_sgl[i + 1].size = wr->wr.sg_list[i].length;
+	}
+
+	wqe_req.num_sge = wr->wr.num_sge + 1;
+	wqe_req.sgl = gdma_sgl;
+	wqe_req.inline_oob_size = sizeof(struct rdma_send_oob);
+	wqe_req.inline_oob_data = &send_oob;
+	wqe_req.flags = GDMA_WR_OOB_IN_SGL;
+	wqe_req.client_data_unit = ib_mtu_enum_to_int(ib_mtu_int_to_enum(ndev->mtu));
+
+	send_oob.wqe_type = WQE_TYPE_UD_SEND;
+	send_oob.fence = !!(wr->wr.send_flags & IB_SEND_FENCE);
+	send_oob.signaled = !!(wr->wr.send_flags & IB_SEND_SIGNALED);
+	send_oob.solicited = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
+	send_oob.psn = qp->ud_qp.sq_psn;
+	send_oob.ssn_or_rqpn = wr->remote_qpn;
+	send_oob.ud_send.remote_qkey =
+		qp->ibqp.qp_type == IB_QPT_GSI ? IB_QP1_QKEY : wr->remote_qkey;
+
+	err = mana_gd_post_work_request(queue, &wqe_req, &wqe_info);
+	if (err)
+		return err;
+
+	qp->ud_qp.sq_psn++;
+	shadow_wqe = shadow_queue_producer_entry(&qp->shadow_sq);
+	memset(shadow_wqe, 0, sizeof(*shadow_wqe));
+	shadow_wqe->header.opcode = IB_WC_SEND;
+	shadow_wqe->header.wr_id = wr->wr.wr_id;
+	shadow_wqe->header.posted_wqe_size = wqe_info.wqe_size_in_bu;
+	shadow_queue_advance_producer(&qp->shadow_sq);
+
+	mana_gd_wq_ring_doorbell(mdev_to_gc(mdev), queue);
+	return 0;
+}
+
+int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		      const struct ib_send_wr **bad_wr)
+{
+	int err;
+	struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+
+	for (; wr; wr = wr->next) {
+		switch (ibqp->qp_type) {
+		case IB_QPT_UD:
+		case IB_QPT_GSI:
+			err = mana_ib_post_send_ud(qp, ud_wr(wr));
+			if (unlikely(err)) {
+				*bad_wr = wr;
+				return err;
+			}
+			break;
+		default:
+			ibdev_dbg(ibqp->device, "Posting send wr on qp type %u is not supported\n",
+				  ibqp->qp_type);
+			return -EINVAL;
+		}
+	}
+
+	return err;
+}
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index a8a9cd7..409e4e8 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -331,6 +331,7 @@  void mana_gd_wq_ring_doorbell(struct gdma_context *gc, struct gdma_queue *queue)
 	mana_gd_ring_doorbell(gc, queue->gdma_dev->doorbell, queue->type,
 			      queue->id, queue->head * GDMA_WQE_BU_SIZE, 0);
 }
+EXPORT_SYMBOL_NS(mana_gd_wq_ring_doorbell, NET_MANA);
 
 void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit)
 {
@@ -1149,6 +1150,7 @@  int mana_gd_post_work_request(struct gdma_queue *wq,
 
 	return 0;
 }
+EXPORT_SYMBOL_NS(mana_gd_post_work_request, NET_MANA);
 
 int mana_gd_post_and_ring(struct gdma_queue *queue,
 			  const struct gdma_wqe_request *wqe_req,