diff mbox series

[for-next,6/6] RDMA/bnxt_re: Enable low latency push

Message ID 1681125115-7127-7-git-send-email-selvin.xavier@broadcom.com (mailing list archive)
State Superseded
Headers show
Series RDMA/bnxt_re: driver update for supporting low latency push | expand

Commit Message

Selvin Xavier April 10, 2023, 11:11 a.m. UTC
Enabling the low latency push in Gen P5 adapters for small
packets. This is supported only for the user space QPs.
Introduce new mmap flag for write combine buffers.
Allocate separate Write Combine pages from BAR when Low
latency push mode is enabled in HW.

Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c  | 43 +++++++++++++++++++++++++++++++
 drivers/infiniband/hw/bnxt_re/ib_verbs.h  |  4 ++-
 drivers/infiniband/hw/bnxt_re/main.c      | 10 +++++++
 drivers/infiniband/hw/bnxt_re/qplib_res.c |  3 +++
 drivers/infiniband/hw/bnxt_re/qplib_res.h |  3 ++-
 include/uapi/rdma/bnxt_re-abi.h           |  9 +++++++
 6 files changed, 70 insertions(+), 2 deletions(-)

Comments

Jason Gunthorpe April 11, 2023, 4:56 p.m. UTC | #1
On Mon, Apr 10, 2023 at 04:11:55AM -0700, Selvin Xavier wrote:
> Enabling the low latency push in Gen P5 adapters for small
> packets. This is supported only for the user space QPs.
> Introduce new mmap flag for write combine buffers.
> Allocate separate Write Combine pages from BAR when Low
> latency push mode is enabled in HW.

No new mmap flags.

This looks like it is basically the same as mlx5, I expect you'll run
into all the same issues and needs as mlx5 did.

Follow the modern mlx5 design, add a new API to allocate one of these
pages and have it return back the mmap cookie to use. Let userspace
allocate as many as it needs.

Jason
Selvin Xavier April 12, 2023, 5:58 a.m. UTC | #2
On Tue, Apr 11, 2023 at 10:26 PM Jason Gunthorpe <jgg@ziepe.ca> wrote:
>
> On Mon, Apr 10, 2023 at 04:11:55AM -0700, Selvin Xavier wrote:
> > Enabling the low latency push in Gen P5 adapters for small
> > packets. This is supported only for the user space QPs.
> > Introduce new mmap flag for write combine buffers.
> > Allocate separate Write Combine pages from BAR when Low
> > latency push mode is enabled in HW.
>
> No new mmap flags.
>
> This looks like it is basically the same as mlx5, I expect you'll run
> into all the same issues and needs as mlx5 did.
>
> Follow the modern mlx5 design, add a new API to allocate one of these
> pages and have it return back the mmap cookie to use. Let userspace
> allocate as many as it needs.
Thanks Jason for the pointers. Will review this and rework on the patch
>
> Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index e2468b8..591ee82 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -566,6 +566,9 @@  int bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata)
 
 	if (udata) {
 		rdma_user_mmap_entry_remove(pd->pd_db_mmap);
+		if (pd->pd_wcdb_mmap)
+			rdma_user_mmap_entry_remove(pd->pd_wcdb_mmap);
+		pd->pd_wcdb_mmap = NULL;
 		pd->pd_db_mmap = NULL;
 	}
 
@@ -597,6 +600,7 @@  int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 	}
 
 	if (udata) {
+		struct bnxt_qplib_chip_ctx *cctx = rdev->chip_ctx;
 		struct bnxt_re_pd_resp resp = {};
 
 		if (!ucntx->dpi.dbr) {
@@ -606,9 +610,23 @@  int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 			 */
 			if (bnxt_qplib_alloc_dpi(&rdev->qplib_res,
 						 &ucntx->dpi, ucntx, BNXT_QPLIB_DPI_TYPE_UC)) {
+				dev_err(rdev_to_dev(rdev), "DP alloc failed");
 				rc = -ENOMEM;
 				goto dbfail;
 			}
+			if (cctx->modes.db_push) {
+				rc = bnxt_qplib_alloc_dpi(&rdev->qplib_res, &ucntx->wcdpi,
+							  ucntx, BNXT_QPLIB_DPI_TYPE_WC);
+				if (rc) {
+					dev_err(rdev_to_dev(rdev), "push dp alloc failed");
+					bnxt_qplib_dealloc_dpi(&rdev->qplib_res, &ucntx->dpi);
+					ucntx->dpi.dbr = NULL;
+					rc = -ENOMEM;
+					goto dbfail;
+				}
+				resp.wcdpi = ucntx->wcdpi.dpi;
+				resp.comp_mask = BNXT_RE_COMP_MASK_PD_HAS_WC_DPI;
+			}
 		}
 
 		resp.pdid = pd->qplib_pd.id;
@@ -625,6 +643,16 @@  int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 			goto dbfail;
 		}
 
+		pd->pd_wcdb_mmap = bnxt_re_mmap_entry_insert(ucntx, (u64)ucntx->wcdpi.umdbr,
+							     BNXT_RE_MMAP_WC_DB, &resp.wcdbr);
+
+		if (!pd->pd_wcdb_mmap) {
+			ibdev_err(&rdev->ibdev,
+				  "Failed to insert mmap entry\n");
+			rc = -ENOMEM;
+			goto dbfail;
+		}
+
 		rc = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
 		if (rc) {
 			ibdev_err(&rdev->ibdev,
@@ -4035,6 +4063,9 @@  int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata)
 	resp.comp_mask |= BNXT_RE_UCNTX_CMASK_HAVE_MODE;
 	resp.mode = rdev->chip_ctx->modes.wqe_mode;
 
+	if (rdev->chip_ctx->modes.db_push)
+		resp.comp_mask |= BNXT_RE_UCNTX_CMASK_WC_DPI_ENABLED;
+
 	uctx->shpage_mmap = bnxt_re_mmap_entry_insert(uctx, 0, BNXT_RE_MMAP_SH_PAGE, NULL);
 	if (!uctx->shpage_mmap) {
 		ibdev_err(ibdev, "Failed to create mmap entry for shared page\n");
@@ -4074,6 +4105,10 @@  void bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx)
 		/* Free DPI only if this is the first PD allocated by the
 		 * application and mark the context dpi as NULL
 		 */
+		if (uctx->wcdpi.dbr) {
+			bnxt_qplib_dealloc_dpi(&rdev->qplib_res, &uctx->wcdpi);
+			uctx->wcdpi.dbr = NULL;
+		}
 		bnxt_qplib_dealloc_dpi(&rdev->qplib_res, &uctx->dpi);
 		uctx->dpi.dbr = NULL;
 	}
@@ -4103,6 +4138,14 @@  int bnxt_re_mmap(struct ib_ucontext *ib_uctx, struct vm_area_struct *vma)
 				  rdma_entry);
 
 	switch (bnxt_entry->mmap_flag) {
+	case BNXT_RE_MMAP_WC_DB:
+		pfn = bnxt_entry->mem_offset >> PAGE_SHIFT;
+		ret = rdma_user_mmap_io(ib_uctx, vma, pfn, PAGE_SIZE,
+					pgprot_writecombine(vma->vm_page_prot),
+					rdma_entry);
+		if (ret)
+			ibdev_err(&rdev->ibdev, "Failed to map WC DB");
+		break;
 	case BNXT_RE_MMAP_UC_DB:
 		pfn = bnxt_entry->mem_offset >> PAGE_SHIFT;
 		ret = rdma_user_mmap_io(ib_uctx, vma, pfn, PAGE_SIZE,
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index dcd31ae..faf1481 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -61,6 +61,7 @@  struct bnxt_re_pd {
 	struct bnxt_qplib_pd	qplib_pd;
 	struct bnxt_re_fence_data fence;
 	struct rdma_user_mmap_entry *pd_db_mmap;
+	struct rdma_user_mmap_entry *pd_wcdb_mmap;
 };
 
 struct bnxt_re_ah {
@@ -135,6 +136,7 @@  struct bnxt_re_ucontext {
 	struct ib_ucontext      ib_uctx;
 	struct bnxt_re_dev	*rdev;
 	struct bnxt_qplib_dpi	dpi;
+	struct bnxt_qplib_dpi   wcdpi;
 	void			*shpg;
 	spinlock_t		sh_lock;	/* protect shpg */
 	struct rdma_user_mmap_entry *shpage_mmap;
@@ -143,6 +145,7 @@  struct bnxt_re_ucontext {
 enum bnxt_re_mmap_flag {
 	BNXT_RE_MMAP_SH_PAGE,
 	BNXT_RE_MMAP_UC_DB,
+	BNXT_RE_MMAP_WC_DB,
 };
 
 struct bnxt_re_user_mmap_entry {
@@ -228,7 +231,6 @@  void bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
 int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
 void bnxt_re_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
 
-
 unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp);
 void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp, unsigned long flags);
 #endif /* __BNXT_RE_IB_VERBS_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index cfd3708..824dc3b 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -117,6 +117,11 @@  static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev)
 	 * in such cases and DB-push will be disabled.
 	 */
 	barlen = pci_resource_len(res->pdev, RCFW_DBR_PCI_BAR_REGION);
+	if (cctx->modes.db_push && l2db_len && en_dev->l2_db_size != barlen) {
+		res->dpi_tbl.wcreg.offset = en_dev->l2_db_size;
+		dev_info(rdev_to_dev(rdev),
+			 "Low latency framework is enabled\n");
+	}
 }
 
 static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev, u8 mode)
@@ -430,6 +435,11 @@  int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev)
 			"Failed to query capabilities, rc = %#x", rc);
 		return rc;
 	}
+	cctx->modes.db_push = le32_to_cpu(resp.flags) & FUNC_QCAPS_RESP_FLAGS_WCB_PUSH_MODE;
+
+	if (cctx->modes.db_push)
+		ibdev_dbg(&rdev->ibdev, "DB push enabled");
+
 	return 0;
 }
 
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
index bb3087d..5483778 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -741,6 +741,9 @@  int bnxt_qplib_alloc_dpi(struct bnxt_qplib_res *res,
 		dpi->dbr = dpit->priv_db;
 		dpi->dpi = dpi->bit;
 		break;
+	case BNXT_QPLIB_DPI_TYPE_WC:
+		dpi->dbr = ioremap_wc(umaddr, PAGE_SIZE);
+		break;
 	default:
 		dpi->dbr = ioremap(umaddr, PAGE_SIZE);
 		break;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index 95b1d6c..dc39f67 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -47,7 +47,7 @@  extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero;
 
 struct bnxt_qplib_drv_modes {
 	u8	wqe_mode;
-	/* Other modes to follow here */
+	bool db_push;
 };
 
 struct bnxt_qplib_chip_ctx {
@@ -193,6 +193,7 @@  struct bnxt_qplib_sgid_tbl {
 enum {
 	BNXT_QPLIB_DPI_TYPE_KERNEL      = 0,
 	BNXT_QPLIB_DPI_TYPE_UC          = 1,
+	BNXT_QPLIB_DPI_TYPE_WC          = 2
 };
 
 struct bnxt_qplib_dpi {
diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h
index c4e9077..719f1fe 100644
--- a/include/uapi/rdma/bnxt_re-abi.h
+++ b/include/uapi/rdma/bnxt_re-abi.h
@@ -51,6 +51,7 @@ 
 enum {
 	BNXT_RE_UCNTX_CMASK_HAVE_CCTX = 0x1ULL,
 	BNXT_RE_UCNTX_CMASK_HAVE_MODE = 0x02ULL,
+	BNXT_RE_UCNTX_CMASK_WC_DPI_ENABLED = 0x04ULL,
 };
 
 enum bnxt_re_wqe_mode {
@@ -78,10 +79,18 @@  struct bnxt_re_uctx_resp {
  * not 8 byted aligned. To avoid undesired padding in various cases we have to
  * set this struct to packed.
  */
+enum {
+	BNXT_RE_COMP_MASK_PD_HAS_WC_DPI = 0x01,
+};
+
 struct bnxt_re_pd_resp {
 	__u32 pdid;
 	__u32 dpi;
 	__u64 dbr;
+	__u64 comp_mask;
+	__u32 rsvd;
+	__u32 wcdpi;
+	__u64 wcdbr;
 } __attribute__((packed, aligned(4)));
 
 struct bnxt_re_cq_req {