diff mbox series

[v2,for-next,15/17] RDMA/bnxt_re: use firmware provided max request timeout

Message ID 1686308514-11996-16-git-send-email-selvin.xavier@broadcom.com (mailing list archive)
State Accepted
Headers show
Series RDMA/bnxt_re: Control path updates | expand

Commit Message

Selvin Xavier June 9, 2023, 11:01 a.m. UTC
From: Kashyap Desai <kashyap.desai@broadcom.com>

Firmware provides max request timeout value as part of hwrm_ver_get
API. Driver gets the timeout from firmware and if that interface is
not available then fall back to hardcoded timeout value.
Also, Add a helper function to check the FW status.

Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
 drivers/infiniband/hw/bnxt_re/main.c       |  8 ++++
 drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 59 ++++++++++++++++++++++++------
 drivers/infiniband/hw/bnxt_re/qplib_rcfw.h |  4 +-
 drivers/infiniband/hw/bnxt_re/qplib_res.h  |  1 +
 4 files changed, 60 insertions(+), 12 deletions(-)
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 8241154..a2c7d3f 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -1041,6 +1041,7 @@  static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
 	struct bnxt_en_dev *en_dev = rdev->en_dev;
 	struct hwrm_ver_get_output resp = {0};
 	struct hwrm_ver_get_input req = {0};
+	struct bnxt_qplib_chip_ctx *cctx;
 	struct bnxt_fw_msg fw_msg;
 	int rc = 0;
 
@@ -1058,11 +1059,18 @@  static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
 			  rc);
 		return;
 	}
+
+	cctx = rdev->chip_ctx;
 	rdev->qplib_ctx.hwrm_intf_ver =
 		(u64)le16_to_cpu(resp.hwrm_intf_major) << 48 |
 		(u64)le16_to_cpu(resp.hwrm_intf_minor) << 32 |
 		(u64)le16_to_cpu(resp.hwrm_intf_build) << 16 |
 		le16_to_cpu(resp.hwrm_intf_patch);
+
+	cctx->hwrm_cmd_max_timeout = le16_to_cpu(resp.max_req_timeout);
+
+	if (!cctx->hwrm_cmd_max_timeout)
+		cctx->hwrm_cmd_max_timeout = RCFW_FW_STALL_MAX_TIMEOUT;
 }
 
 static int bnxt_re_ib_init(struct bnxt_re_dev *rdev)
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 8b1b413..99aa1ae 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -90,6 +90,41 @@  static int bnxt_qplib_map_rc(u8 opcode)
 }
 
 /**
+ * bnxt_re_is_fw_stalled   -	Check firmware health
+ * @rcfw      -   rcfw channel instance of rdev
+ * @cookie    -   cookie to track the command
+ * @opcode    -   rcfw submitted for given opcode
+ * @cbit      -   bitmap entry of cookie
+ *
+ * If firmware has not responded any rcfw command within
+ * rcfw->max_timeout, consider firmware as stalled.
+ *
+ * Returns:
+ * 0 if firmware is responding
+ * -ENODEV if firmware is not responding
+ */
+static int bnxt_re_is_fw_stalled(struct bnxt_qplib_rcfw *rcfw,
+				 u16 cookie, u8 opcode, u16 cbit)
+{
+	struct bnxt_qplib_cmdq_ctx *cmdq;
+
+	cmdq = &rcfw->cmdq;
+
+	if (time_after(jiffies, cmdq->last_seen +
+		      (rcfw->max_timeout * HZ))) {
+		dev_warn_ratelimited(&rcfw->pdev->dev,
+				     "%s: FW STALL Detected. cmdq[%#x]=%#x waited (%d > %d) msec active %d ",
+				     __func__, cookie, opcode,
+				     jiffies_to_msecs(jiffies - cmdq->last_seen),
+				     rcfw->max_timeout * 1000,
+				     test_bit(cbit, cmdq->cmdq_bitmap));
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+/**
  * __wait_for_resp   -	Don't hold the cpu context and wait for response
  * @rcfw      -   rcfw channel instance of rdev
  * @cookie    -   cookie to track the command
@@ -105,6 +140,7 @@  static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie, u8 opcode)
 {
 	struct bnxt_qplib_cmdq_ctx *cmdq;
 	u16 cbit;
+	int ret;
 
 	cmdq = &rcfw->cmdq;
 	cbit = cookie % rcfw->cmdq_depth;
@@ -118,8 +154,8 @@  static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie, u8 opcode)
 		wait_event_timeout(cmdq->waitq,
 				   !test_bit(cbit, cmdq->cmdq_bitmap) ||
 				   test_bit(ERR_DEVICE_DETACHED, &cmdq->flags),
-				   msecs_to_jiffies(RCFW_FW_STALL_TIMEOUT_SEC
-						    * 1000));
+				   msecs_to_jiffies(rcfw->max_timeout * 1000));
+
 		if (!test_bit(cbit, cmdq->cmdq_bitmap))
 			return 0;
 
@@ -128,10 +164,9 @@  static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie, u8 opcode)
 		if (!test_bit(cbit, cmdq->cmdq_bitmap))
 			return 0;
 
-		/* Firmware stall is detected */
-		if (time_after(jiffies, cmdq->last_seen +
-			      (RCFW_FW_STALL_TIMEOUT_SEC * HZ)))
-			return -ENODEV;
+		ret = bnxt_re_is_fw_stalled(rcfw, cookie, opcode, cbit);
+		if (ret)
+			return ret;
 
 	} while (true);
 };
@@ -352,6 +387,7 @@  static int __poll_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie,
 	struct bnxt_qplib_cmdq_ctx *cmdq = &rcfw->cmdq;
 	unsigned long issue_time;
 	u16 cbit;
+	int ret;
 
 	cbit = cookie % rcfw->cmdq_depth;
 	issue_time = jiffies;
@@ -368,11 +404,10 @@  static int __poll_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie,
 		if (!test_bit(cbit, cmdq->cmdq_bitmap))
 			return 0;
 		if (jiffies_to_msecs(jiffies - issue_time) >
-		    (RCFW_FW_STALL_TIMEOUT_SEC * 1000)) {
-			/* Firmware stall is detected */
-			if (time_after(jiffies, cmdq->last_seen +
-				      (RCFW_FW_STALL_TIMEOUT_SEC * HZ)))
-				return -ENODEV;
+		    (rcfw->max_timeout * 1000)) {
+			ret = bnxt_re_is_fw_stalled(rcfw, cookie, opcode, cbit);
+			if (ret)
+				return ret;
 		}
 	} while (true);
 };
@@ -951,6 +986,8 @@  int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
 	if (!rcfw->qp_tbl)
 		goto fail;
 
+	rcfw->max_timeout = res->cctx->hwrm_cmd_max_timeout;
+
 	return 0;
 
 fail:
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index 338bf6a..b644dcc 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -51,7 +51,7 @@ 
 
 #define RCFW_DBR_PCI_BAR_REGION		2
 #define RCFW_DBR_BASE_PAGE_SHIFT	12
-#define RCFW_FW_STALL_TIMEOUT_SEC	40
+#define RCFW_FW_STALL_MAX_TIMEOUT	40
 
 /* Cmdq contains a fix number of a 16-Byte slots */
 struct bnxt_qplib_cmdqe {
@@ -227,6 +227,8 @@  struct bnxt_qplib_rcfw {
 	atomic_t rcfw_intr_enabled;
 	struct semaphore rcfw_inflight;
 	atomic_t timeout_send;
+	/* cached from chip cctx for quick reference in slow path */
+	u16 max_timeout;
 };
 
 struct bnxt_qplib_cmdqmsg {
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index 982e2c9..77f0b84 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -55,6 +55,7 @@  struct bnxt_qplib_chip_ctx {
 	u8	chip_rev;
 	u8	chip_metal;
 	u16	hw_stats_size;
+	u16	hwrm_cmd_max_timeout;
 	struct bnxt_qplib_drv_modes modes;
 };