diff mbox series

[for-next,4/4] RDMA/bnxt_re: Increase depth of control path command queue

Message ID 1544122186-7610-5-git-send-email-selvin.xavier@broadcom.com (mailing list archive)
State Superseded
Delegated to: Jason Gunthorpe
Headers show
Series RDMA/bnxt_re: driver update | expand

Commit Message

Selvin Xavier Dec. 6, 2018, 6:49 p.m. UTC
From: Devesh Sharma <devesh.sharma@broadcom.com>

Increasing the depth of control path command queue to 8K
entries to handle burst of commands. This feature needs
support from FW and the driver/fw compatibility is checked
from the interface version number.

Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
 drivers/infiniband/hw/bnxt_re/main.c       |  1 +
 drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 34 +++++++++++++++++---------
 drivers/infiniband/hw/bnxt_re/qplib_rcfw.h | 39 +++++++++++++++++++++---------
 3 files changed, 51 insertions(+), 23 deletions(-)

Comments

Jason Gunthorpe Dec. 7, 2018, 9:27 p.m. UTC | #1
On Thu, Dec 06, 2018 at 10:49:46AM -0800, Selvin Xavier wrote:
> From: Devesh Sharma <devesh.sharma@broadcom.com>
> 
> Increasing the depth of control path command queue to 8K
> entries to handle burst of commands. This feature needs
> support from FW and the driver/fw compatibility is checked
> from the interface version number.
> -#define BNXT_QPLIB_CMDQE_MAX_CNT	256
> +#define BNXT_QPLIB_CMDQE_MAX_CNT_256	256
> +#define BNXT_QPLIB_CMDQE_MAX_CNT_8192	8192
>  #define BNXT_QPLIB_CMDQE_UNITS		sizeof(struct bnxt_qplib_cmdqe)
> -#define BNXT_QPLIB_CMDQE_CNT_PER_PG	(PAGE_SIZE / BNXT_QPLIB_CMDQE_UNITS)
> +#define BNXT_QPLIB_CMDQE_BYTES(depth)	((depth) * BNXT_QPLIB_CMDQE_UNITS)
> +#define BNXT_QPLIB_CMDQE_NPAGES(depth)	((BNXT_QPLIB_CMDQE_BYTES(depth) %\
> +					  PAGE_SIZE) ?			\
> +					  ((BNXT_QPLIB_CMDQE_BYTES(depth) /\
> +					    PAGE_SIZE) + 1) :		\
> +					  (BNXT_QPLIB_CMDQE_BYTES(depth) /\
> +					   PAGE_SIZE))

Several of these look like they should be a static inline function to me, at
least this last one should be.

> +#define BNXT_QPLIB_CMDQE_PAGE_SIZE(depth) (BNXT_QPLIB_CMDQE_NPAGES(depth) * \
> +					   PAGE_SIZE)
> +
> +#define BNXT_QPLIB_CMDQE_CNT_PER_PG(depth) (BNXT_QPLIB_CMDQE_PAGE_SIZE(depth) /\
> +					    BNXT_QPLIB_CMDQE_UNITS)
> +
> +#define MAX_CMDQ_IDX(depth)		((depth) - 1)
> +#define MAX_CMDQ_IDX_PER_PG(depth)	(BNXT_QPLIB_CMDQE_CNT_PER_PG(depth) - 1)
>  
> -#define MAX_CMDQ_IDX			(BNXT_QPLIB_CMDQE_MAX_CNT - 1)
> -#define MAX_CMDQ_IDX_PER_PG		(BNXT_QPLIB_CMDQE_CNT_PER_PG - 1)
> -
> -#define RCFW_MAX_OUTSTANDING_CMD	BNXT_QPLIB_CMDQE_MAX_CNT
>  #define RCFW_MAX_COOKIE_VALUE		0x7FFF
>  #define RCFW_CMD_IS_BLOCKING		0x8000
>  #define RCFW_BLOCKED_CMD_WAIT_COUNT	0x4E20
>  
> +#define HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK 0x1000900020011ULL
> +
>  /* Cmdq contains a fix number of a 16-Byte slots */
>  struct bnxt_qplib_cmdqe {
>  	u8		data[16];
>  };
>  
> -static inline u32 get_cmdq_pg(u32 val)
> +static inline u32 get_cmdq_pg(u32 val, u32 depth)
>  {
> -	return (val & ~MAX_CMDQ_IDX_PER_PG) / BNXT_QPLIB_CMDQE_CNT_PER_PG;
> +	return (val & ~(MAX_CMDQ_IDX_PER_PG(depth))) /
> +		(BNXT_QPLIB_CMDQE_CNT_PER_PG(depth));
>  }
>  
> -static inline u32 get_cmdq_idx(u32 val)
> +static inline u32 get_cmdq_idx(u32 val, u32 depth)
>  {
> -	return val & MAX_CMDQ_IDX_PER_PG;
> +	return val & (MAX_CMDQ_IDX_PER_PG(depth));
>  }

.. and if you are already going to have static inlines with trivial
content this, then probably all of the above macros should be
functions.

Jason
Selvin Xavier Dec. 10, 2018, 5:48 a.m. UTC | #2
On Sat, Dec 8, 2018 at 2:57 AM Jason Gunthorpe <jgg@ziepe.ca> wrote:
>
> On Thu, Dec 06, 2018 at 10:49:46AM -0800, Selvin Xavier wrote:
> > From: Devesh Sharma <devesh.sharma@broadcom.com>
> >
> > Increasing the depth of control path command queue to 8K
> > entries to handle burst of commands. This feature needs
> > support from FW and the driver/fw compatibility is checked
> > from the interface version number.
> > -#define BNXT_QPLIB_CMDQE_MAX_CNT     256
> > +#define BNXT_QPLIB_CMDQE_MAX_CNT_256 256
> > +#define BNXT_QPLIB_CMDQE_MAX_CNT_8192        8192
> >  #define BNXT_QPLIB_CMDQE_UNITS               sizeof(struct bnxt_qplib_cmdqe)
> > -#define BNXT_QPLIB_CMDQE_CNT_PER_PG  (PAGE_SIZE / BNXT_QPLIB_CMDQE_UNITS)
> > +#define BNXT_QPLIB_CMDQE_BYTES(depth)        ((depth) * BNXT_QPLIB_CMDQE_UNITS)
> > +#define BNXT_QPLIB_CMDQE_NPAGES(depth)       ((BNXT_QPLIB_CMDQE_BYTES(depth) %\
> > +                                       PAGE_SIZE) ?                  \
> > +                                       ((BNXT_QPLIB_CMDQE_BYTES(depth) /\
> > +                                         PAGE_SIZE) + 1) :           \
> > +                                       (BNXT_QPLIB_CMDQE_BYTES(depth) /\
> > +                                        PAGE_SIZE))
>
> Several of these look like they should be a static inline function to me, at
> least this last one should be.
sure. will do
>
> > +#define BNXT_QPLIB_CMDQE_PAGE_SIZE(depth) (BNXT_QPLIB_CMDQE_NPAGES(depth) * \
> > +                                        PAGE_SIZE)
> > +
> > +#define BNXT_QPLIB_CMDQE_CNT_PER_PG(depth) (BNXT_QPLIB_CMDQE_PAGE_SIZE(depth) /\
> > +                                         BNXT_QPLIB_CMDQE_UNITS)
> > +
> > +#define MAX_CMDQ_IDX(depth)          ((depth) - 1)
> > +#define MAX_CMDQ_IDX_PER_PG(depth)   (BNXT_QPLIB_CMDQE_CNT_PER_PG(depth) - 1)
> >
> > -#define MAX_CMDQ_IDX                 (BNXT_QPLIB_CMDQE_MAX_CNT - 1)
> > -#define MAX_CMDQ_IDX_PER_PG          (BNXT_QPLIB_CMDQE_CNT_PER_PG - 1)
> > -
> > -#define RCFW_MAX_OUTSTANDING_CMD     BNXT_QPLIB_CMDQE_MAX_CNT
> >  #define RCFW_MAX_COOKIE_VALUE                0x7FFF
> >  #define RCFW_CMD_IS_BLOCKING         0x8000
> >  #define RCFW_BLOCKED_CMD_WAIT_COUNT  0x4E20
> >
> > +#define HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK 0x1000900020011ULL
> > +
> >  /* Cmdq contains a fix number of a 16-Byte slots */
> >  struct bnxt_qplib_cmdqe {
> >       u8              data[16];
> >  };
> >
> > -static inline u32 get_cmdq_pg(u32 val)
> > +static inline u32 get_cmdq_pg(u32 val, u32 depth)
> >  {
> > -     return (val & ~MAX_CMDQ_IDX_PER_PG) / BNXT_QPLIB_CMDQE_CNT_PER_PG;
> > +     return (val & ~(MAX_CMDQ_IDX_PER_PG(depth))) /
> > +             (BNXT_QPLIB_CMDQE_CNT_PER_PG(depth));
> >  }
> >
> > -static inline u32 get_cmdq_idx(u32 val)
> > +static inline u32 get_cmdq_idx(u32 val, u32 depth)
> >  {
> > -     return val & MAX_CMDQ_IDX_PER_PG;
> > +     return val & (MAX_CMDQ_IDX_PER_PG(depth));
> >  }
>
> .. and if you are already going to have static inlines with trivial
> content this, then probably all of the above macros should be
> functions.
>
> Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 1c75c07..088d012 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -1344,6 +1344,7 @@  static int bnxt_re_dev_init(struct bnxt_re_dev *rdev)
 	 * memory for the function and all child VFs
 	 */
 	rc = bnxt_qplib_alloc_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw,
+					   &rdev->qplib_ctx,
 					   BNXT_RE_MAX_QPC_COUNT);
 	if (rc) {
 		pr_err("Failed to allocate RCFW Channel: %#x\n", rc);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index be4e33e..41c5220 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -58,7 +58,7 @@  static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
 	u16 cbit;
 	int rc;
 
-	cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+	cbit = cookie % rcfw->cmdq_depth;
 	rc = wait_event_timeout(rcfw->waitq,
 				!test_bit(cbit, rcfw->cmdq_bitmap),
 				msecs_to_jiffies(RCFW_CMD_WAIT_TIME_MS));
@@ -70,7 +70,7 @@  static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
 	u32 count = RCFW_BLOCKED_CMD_WAIT_COUNT;
 	u16 cbit;
 
-	cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+	cbit = cookie % rcfw->cmdq_depth;
 	if (!test_bit(cbit, rcfw->cmdq_bitmap))
 		goto done;
 	do {
@@ -86,6 +86,7 @@  static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
 {
 	struct bnxt_qplib_cmdqe *cmdqe, **cmdq_ptr;
 	struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq;
+	u32 cmdq_depth = rcfw->cmdq_depth;
 	struct bnxt_qplib_crsq *crsqe;
 	u32 sw_prod, cmdq_prod;
 	unsigned long flags;
@@ -124,7 +125,7 @@  static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
 
 
 	cookie = rcfw->seq_num & RCFW_MAX_COOKIE_VALUE;
-	cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+	cbit = cookie % rcfw->cmdq_depth;
 	if (is_block)
 		cookie |= RCFW_CMD_IS_BLOCKING;
 
@@ -153,7 +154,8 @@  static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
 	do {
 		/* Locate the next cmdq slot */
 		sw_prod = HWQ_CMP(cmdq->prod, cmdq);
-		cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod)][get_cmdq_idx(sw_prod)];
+		cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod, cmdq_depth)]
+				[get_cmdq_idx(sw_prod, cmdq_depth)];
 		if (!cmdqe) {
 			dev_err(&rcfw->pdev->dev,
 				"RCFW request failed with no cmdqe!\n");
@@ -326,7 +328,7 @@  static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
 		mcookie = qp_event->cookie;
 		blocked = cookie & RCFW_CMD_IS_BLOCKING;
 		cookie &= RCFW_MAX_COOKIE_VALUE;
-		cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+		cbit = cookie % rcfw->cmdq_depth;
 		crsqe = &rcfw->crsqe_tbl[cbit];
 		if (crsqe->resp &&
 		    crsqe->resp->cookie  == mcookie) {
@@ -555,6 +557,7 @@  void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
 
 int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
 				  struct bnxt_qplib_rcfw *rcfw,
+				  struct bnxt_qplib_ctx *ctx,
 				  int qp_tbl_sz)
 {
 	rcfw->pdev = pdev;
@@ -567,11 +570,18 @@  int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
 			"HW channel CREQ allocation failed\n");
 		goto fail;
 	}
-	rcfw->cmdq.max_elements = BNXT_QPLIB_CMDQE_MAX_CNT;
-	if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->cmdq, NULL, 0,
-				      &rcfw->cmdq.max_elements,
-				      BNXT_QPLIB_CMDQE_UNITS, 0, PAGE_SIZE,
-				      HWQ_TYPE_CTX)) {
+	if (ctx->hwrm_intf_ver < HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK)
+		rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT_256;
+	else
+		rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT_8192;
+
+	rcfw->cmdq.max_elements = rcfw->cmdq_depth;
+	if (bnxt_qplib_alloc_init_hwq
+			(rcfw->pdev, &rcfw->cmdq, NULL, 0,
+			 &rcfw->cmdq.max_elements,
+			 BNXT_QPLIB_CMDQE_UNITS, 0,
+			 BNXT_QPLIB_CMDQE_PAGE_SIZE(rcfw->cmdq_depth),
+			 HWQ_TYPE_CTX)) {
 		dev_err(&rcfw->pdev->dev,
 			"HW channel CMDQ allocation failed\n");
 		goto fail;
@@ -674,7 +684,7 @@  int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
 	/* General */
 	rcfw->seq_num = 0;
 	set_bit(FIRMWARE_FIRST_FLAG, &rcfw->flags);
-	bmap_size = BITS_TO_LONGS(RCFW_MAX_OUTSTANDING_CMD *
+	bmap_size = BITS_TO_LONGS(rcfw->cmdq_depth *
 				  sizeof(unsigned long));
 	rcfw->cmdq_bitmap = kzalloc(bmap_size, GFP_KERNEL);
 	if (!rcfw->cmdq_bitmap)
@@ -734,7 +744,7 @@  int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
 
 	init.cmdq_pbl = cpu_to_le64(rcfw->cmdq.pbl[PBL_LVL_0].pg_map_arr[0]);
 	init.cmdq_size_cmdq_lvl = cpu_to_le16(
-		((BNXT_QPLIB_CMDQE_MAX_CNT << CMDQ_INIT_CMDQ_SIZE_SFT) &
+		((rcfw->cmdq_depth << CMDQ_INIT_CMDQ_SIZE_SFT) &
 		 CMDQ_INIT_CMDQ_SIZE_MASK) |
 		((rcfw->cmdq.level << CMDQ_INIT_CMDQ_LVL_SFT) &
 		 CMDQ_INIT_CMDQ_LVL_MASK));
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index 9a8687dc..7dd5747 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -64,31 +64,45 @@ 
 #define RCFW_CMD_WAIT_TIME_MS		20000 /* 20 Seconds timeout */
 
 /* CMDQ elements */
-#define BNXT_QPLIB_CMDQE_MAX_CNT	256
+#define BNXT_QPLIB_CMDQE_MAX_CNT_256	256
+#define BNXT_QPLIB_CMDQE_MAX_CNT_8192	8192
 #define BNXT_QPLIB_CMDQE_UNITS		sizeof(struct bnxt_qplib_cmdqe)
-#define BNXT_QPLIB_CMDQE_CNT_PER_PG	(PAGE_SIZE / BNXT_QPLIB_CMDQE_UNITS)
+#define BNXT_QPLIB_CMDQE_BYTES(depth)	((depth) * BNXT_QPLIB_CMDQE_UNITS)
+#define BNXT_QPLIB_CMDQE_NPAGES(depth)	((BNXT_QPLIB_CMDQE_BYTES(depth) %\
+					  PAGE_SIZE) ?			\
+					  ((BNXT_QPLIB_CMDQE_BYTES(depth) /\
+					    PAGE_SIZE) + 1) :		\
+					  (BNXT_QPLIB_CMDQE_BYTES(depth) /\
+					   PAGE_SIZE))
+#define BNXT_QPLIB_CMDQE_PAGE_SIZE(depth) (BNXT_QPLIB_CMDQE_NPAGES(depth) * \
+					   PAGE_SIZE)
+
+#define BNXT_QPLIB_CMDQE_CNT_PER_PG(depth) (BNXT_QPLIB_CMDQE_PAGE_SIZE(depth) /\
+					    BNXT_QPLIB_CMDQE_UNITS)
+
+#define MAX_CMDQ_IDX(depth)		((depth) - 1)
+#define MAX_CMDQ_IDX_PER_PG(depth)	(BNXT_QPLIB_CMDQE_CNT_PER_PG(depth) - 1)
 
-#define MAX_CMDQ_IDX			(BNXT_QPLIB_CMDQE_MAX_CNT - 1)
-#define MAX_CMDQ_IDX_PER_PG		(BNXT_QPLIB_CMDQE_CNT_PER_PG - 1)
-
-#define RCFW_MAX_OUTSTANDING_CMD	BNXT_QPLIB_CMDQE_MAX_CNT
 #define RCFW_MAX_COOKIE_VALUE		0x7FFF
 #define RCFW_CMD_IS_BLOCKING		0x8000
 #define RCFW_BLOCKED_CMD_WAIT_COUNT	0x4E20
 
+#define HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK 0x1000900020011ULL
+
 /* Cmdq contains a fix number of a 16-Byte slots */
 struct bnxt_qplib_cmdqe {
 	u8		data[16];
 };
 
-static inline u32 get_cmdq_pg(u32 val)
+static inline u32 get_cmdq_pg(u32 val, u32 depth)
 {
-	return (val & ~MAX_CMDQ_IDX_PER_PG) / BNXT_QPLIB_CMDQE_CNT_PER_PG;
+	return (val & ~(MAX_CMDQ_IDX_PER_PG(depth))) /
+		(BNXT_QPLIB_CMDQE_CNT_PER_PG(depth));
 }
 
-static inline u32 get_cmdq_idx(u32 val)
+static inline u32 get_cmdq_idx(u32 val, u32 depth)
 {
-	return val & MAX_CMDQ_IDX_PER_PG;
+	return val & (MAX_CMDQ_IDX_PER_PG(depth));
 }
 
 /* Crsq buf is 1024-Byte */
@@ -194,11 +208,14 @@  struct bnxt_qplib_rcfw {
 	struct bnxt_qplib_qp_node *qp_tbl;
 	u64 oos_prev;
 	u32 init_oos_stats;
+	u32 cmdq_depth;
 };
 
 void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
 int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
-				  struct bnxt_qplib_rcfw *rcfw, int qp_tbl_sz);
+				  struct bnxt_qplib_rcfw *rcfw,
+				  struct bnxt_qplib_ctx *ctx,
+				  int qp_tbl_sz);
 void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill);
 void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
 int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector,