diff mbox series

[for-next,10/17] RDMA/bnxt_re: handle command completions after driver detect a timedout

Message ID 1686219908-11181-11-git-send-email-selvin.xavier@broadcom.com (mailing list archive)
State Superseded
Headers show
Series RDMA/bnxt_re: Control path updates | expand

Commit Message

Selvin Xavier June 8, 2023, 10:25 a.m. UTC
From: Kashyap Desai <kashyap.desai@broadcom.com>

If calling context detect command timeout, associated memory stored on
stack will not be valid. If firmware complete the same command later,
this causes incorrect memory access by driver.

Added is_waiter_alive to handle delayed completion by firmware.
is_waiter_alive is set and reset under command queue lock.

Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
 drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 46 +++++++++++++++++++-----------
 drivers/infiniband/hw/bnxt_re/qplib_rcfw.h |  1 +
 2 files changed, 30 insertions(+), 17 deletions(-)

Comments

kernel test robot June 8, 2023, 12:53 p.m. UTC | #1
Hi Selvin,

kernel test robot noticed the following build warnings:

[auto build test WARNING on rdma/for-next]
[also build test WARNING on next-20230608]
[cannot apply to linus/master v6.4-rc5]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Selvin-Xavier/RDMA-bnxt_re-wraparound-mbox-producer-index/20230608-184033
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git for-next
patch link:    https://lore.kernel.org/r/1686219908-11181-11-git-send-email-selvin.xavier%40broadcom.com
patch subject: [PATCH for-next 10/17] RDMA/bnxt_re: handle command completions after driver detect a timedout
config: riscv-allyesconfig (https://download.01.org/0day-ci/archive/20230608/202306082023.3MJIBDhd-lkp@intel.com/config)
compiler: riscv64-linux-gcc (GCC) 12.3.0
reproduce (this is a W=1 build):
        mkdir -p ~/bin
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        git remote add rdma https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git
        git fetch rdma for-next
        git checkout rdma/for-next
        b4 shazam https://lore.kernel.org/r/1686219908-11181-11-git-send-email-selvin.xavier@broadcom.com
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.3.0 ~/bin/make.cross W=1 O=build_dir ARCH=riscv olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.3.0 ~/bin/make.cross W=1 O=build_dir ARCH=riscv SHELL=/bin/bash drivers/infiniband/hw/bnxt_re/

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202306082023.3MJIBDhd-lkp@intel.com/

All warnings (new ones prefixed by >>):

   drivers/infiniband/hw/bnxt_re/qplib_rcfw.c: In function '__wait_for_resp':
   drivers/infiniband/hw/bnxt_re/qplib_rcfw.c:108:13: warning: variable 'ret' set but not used [-Wunused-but-set-variable]
     108 |         int ret;
         |             ^~~
   drivers/infiniband/hw/bnxt_re/qplib_rcfw.c: In function '__send_message':
   drivers/infiniband/hw/bnxt_re/qplib_rcfw.c:173:20: warning: variable 'opcode' set but not used [-Wunused-but-set-variable]
     173 |         u32 bsize, opcode, free_slots, required_slots;
         |                    ^~~~~~
   drivers/infiniband/hw/bnxt_re/qplib_rcfw.c: In function 'bnxt_qplib_process_qp_event':
>> drivers/infiniband/hw/bnxt_re/qplib_rcfw.c:502:17: warning: variable 'mcookie' set but not used [-Wunused-but-set-variable]
     502 |         __le16  mcookie;
         |                 ^~~~~~~

Kconfig warnings: (for reference only)
   WARNING: unmet direct dependencies detected for IOMMU_IO_PGTABLE_LPAE
   Depends on [n]: IOMMU_SUPPORT [=y] && (ARM || ARM64 || COMPILE_TEST [=n]) && !GENERIC_ATOMIC64 [=n]
   Selected by [y]:
   - IPMMU_VMSA [=y] && IOMMU_SUPPORT [=y] && (ARCH_RENESAS [=y] || COMPILE_TEST [=n]) && !GENERIC_ATOMIC64 [=n]


vim +/mcookie +502 drivers/infiniband/hw/bnxt_re/qplib_rcfw.c

1ac5a404797523 Selvin Xavier         2017-02-10  487  
1ac5a404797523 Selvin Xavier         2017-02-10  488  static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
1020ce59adaac6 Kashyap Desai         2023-06-08  489  				       struct creq_qp_event *qp_event,
1020ce59adaac6 Kashyap Desai         2023-06-08  490  				       u32 *num_wait)
1ac5a404797523 Selvin Xavier         2017-02-10  491  {
f218d67ef00431 Selvin Xavier         2017-06-29  492  	struct creq_qp_error_notification *err_event;
cee0c7bba48691 Devesh Sharma         2020-02-15  493  	struct bnxt_qplib_hwq *hwq = &rcfw->cmdq.hwq;
cee0c7bba48691 Devesh Sharma         2020-02-15  494  	struct bnxt_qplib_crsqe *crsqe;
9a38dbbd424de0 Kashyap Desai         2023-06-08  495  	u32 qp_id, tbl_indx, req_size;
f218d67ef00431 Selvin Xavier         2017-06-29  496  	struct bnxt_qplib_qp *qp;
cc1ec769b87c7d Devesh Sharma         2017-05-22  497  	u16 cbit, blocked = 0;
9a38dbbd424de0 Kashyap Desai         2023-06-08  498  	bool is_waiter_alive;
cee0c7bba48691 Devesh Sharma         2020-02-15  499  	struct pci_dev *pdev;
cee0c7bba48691 Devesh Sharma         2020-02-15  500  	unsigned long flags;
1020ce59adaac6 Kashyap Desai         2023-06-08  501  	u32 wait_cmds = 0;
cc1ec769b87c7d Devesh Sharma         2017-05-22 @502  	__le16  mcookie;
cee0c7bba48691 Devesh Sharma         2020-02-15  503  	u16 cookie;
cee0c7bba48691 Devesh Sharma         2020-02-15  504  	int rc = 0;
1ac5a404797523 Selvin Xavier         2017-02-10  505  
cee0c7bba48691 Devesh Sharma         2020-02-15  506  	pdev = rcfw->pdev;
1ac5a404797523 Selvin Xavier         2017-02-10  507  	switch (qp_event->event) {
1ac5a404797523 Selvin Xavier         2017-02-10  508  	case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION:
f218d67ef00431 Selvin Xavier         2017-06-29  509  		err_event = (struct creq_qp_error_notification *)qp_event;
f218d67ef00431 Selvin Xavier         2017-06-29  510  		qp_id = le32_to_cpu(err_event->xid);
84cf229f4001c1 Selvin Xavier         2020-08-24  511  		tbl_indx = map_qp_id_to_tbl_indx(qp_id, rcfw);
84cf229f4001c1 Selvin Xavier         2020-08-24  512  		qp = rcfw->qp_tbl[tbl_indx].qp_handle;
cee0c7bba48691 Devesh Sharma         2020-02-15  513  		dev_dbg(&pdev->dev, "Received QP error notification\n");
cee0c7bba48691 Devesh Sharma         2020-02-15  514  		dev_dbg(&pdev->dev,
08920b8f5d2d3b Joe Perches           2018-08-10  515  			"qpid 0x%x, req_err=0x%x, resp_err=0x%x\n",
f218d67ef00431 Selvin Xavier         2017-06-29  516  			qp_id, err_event->req_err_state_reason,
f218d67ef00431 Selvin Xavier         2017-06-29  517  			err_event->res_err_state_reason);
d6d5c59905c8af Sriharsha Basavapatna 2017-10-31  518  		if (!qp)
d6d5c59905c8af Sriharsha Basavapatna 2017-10-31  519  			break;
f218d67ef00431 Selvin Xavier         2017-06-29  520  		bnxt_qplib_mark_qp_error(qp);
cee0c7bba48691 Devesh Sharma         2020-02-15  521  		rc = rcfw->creq.aeq_handler(rcfw, qp_event, qp);
1ac5a404797523 Selvin Xavier         2017-02-10  522  		break;
1ac5a404797523 Selvin Xavier         2017-02-10  523  	default:
d455f29f6d76a5 Selvin Xavier         2018-10-08  524  		/*
d455f29f6d76a5 Selvin Xavier         2018-10-08  525  		 * Command Response
d455f29f6d76a5 Selvin Xavier         2018-10-08  526  		 * cmdq->lock needs to be acquired to synchronie
d455f29f6d76a5 Selvin Xavier         2018-10-08  527  		 * the command send and completion reaping. This function
d455f29f6d76a5 Selvin Xavier         2018-10-08  528  		 * is always called with creq->lock held. Using
d455f29f6d76a5 Selvin Xavier         2018-10-08  529  		 * the nested variant of spin_lock.
d455f29f6d76a5 Selvin Xavier         2018-10-08  530  		 *
d455f29f6d76a5 Selvin Xavier         2018-10-08  531  		 */
d455f29f6d76a5 Selvin Xavier         2018-10-08  532  
cee0c7bba48691 Devesh Sharma         2020-02-15  533  		spin_lock_irqsave_nested(&hwq->lock, flags,
d455f29f6d76a5 Selvin Xavier         2018-10-08  534  					 SINGLE_DEPTH_NESTING);
cc1ec769b87c7d Devesh Sharma         2017-05-22  535  		cookie = le16_to_cpu(qp_event->cookie);
cc1ec769b87c7d Devesh Sharma         2017-05-22  536  		mcookie = qp_event->cookie;
1ac5a404797523 Selvin Xavier         2017-02-10  537  		blocked = cookie & RCFW_CMD_IS_BLOCKING;
1ac5a404797523 Selvin Xavier         2017-02-10  538  		cookie &= RCFW_MAX_COOKIE_VALUE;
bd1c24ccf9eb07 Devesh Sharma         2018-12-12  539  		cbit = cookie % rcfw->cmdq_depth;
cc1ec769b87c7d Devesh Sharma         2017-05-22  540  		crsqe = &rcfw->crsqe_tbl[cbit];
cee0c7bba48691 Devesh Sharma         2020-02-15  541  		if (!test_and_clear_bit(cbit, rcfw->cmdq.cmdq_bitmap))
cee0c7bba48691 Devesh Sharma         2020-02-15  542  			dev_warn(&pdev->dev,
08920b8f5d2d3b Joe Perches           2018-08-10  543  				 "CMD bit %d was not requested\n", cbit);
cc1ec769b87c7d Devesh Sharma         2017-05-22  544  
9a38dbbd424de0 Kashyap Desai         2023-06-08  545  		if (crsqe->is_waiter_alive) {
9a38dbbd424de0 Kashyap Desai         2023-06-08  546  			if (crsqe->resp)
9a38dbbd424de0 Kashyap Desai         2023-06-08  547  				memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
1ac5a404797523 Selvin Xavier         2017-02-10  548  			if (!blocked)
1020ce59adaac6 Kashyap Desai         2023-06-08  549  				wait_cmds++;
9a38dbbd424de0 Kashyap Desai         2023-06-08  550  		}
9a38dbbd424de0 Kashyap Desai         2023-06-08  551  
9a38dbbd424de0 Kashyap Desai         2023-06-08  552  		req_size = crsqe->req_size;
9a38dbbd424de0 Kashyap Desai         2023-06-08  553  		is_waiter_alive = crsqe->is_waiter_alive;
9a38dbbd424de0 Kashyap Desai         2023-06-08  554  
9a38dbbd424de0 Kashyap Desai         2023-06-08  555  		crsqe->req_size = 0;
9a38dbbd424de0 Kashyap Desai         2023-06-08  556  		if (!is_waiter_alive)
9a38dbbd424de0 Kashyap Desai         2023-06-08  557  			crsqe->resp = NULL;
9a38dbbd424de0 Kashyap Desai         2023-06-08  558  
9a38dbbd424de0 Kashyap Desai         2023-06-08  559  		hwq->cons += req_size;
cee0c7bba48691 Devesh Sharma         2020-02-15  560  		spin_unlock_irqrestore(&hwq->lock, flags);
1ac5a404797523 Selvin Xavier         2017-02-10  561  	}
1020ce59adaac6 Kashyap Desai         2023-06-08  562  	*num_wait += wait_cmds;
cee0c7bba48691 Devesh Sharma         2020-02-15  563  	return rc;
1ac5a404797523 Selvin Xavier         2017-02-10  564  }
1ac5a404797523 Selvin Xavier         2017-02-10  565
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 3215f8a..e105961 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -216,6 +216,7 @@  static int __send_message(struct bnxt_qplib_rcfw *rcfw,
 	crsqe->free_slots = free_slots;
 	crsqe->resp = (struct creq_qp_event *)msg->resp;
 	crsqe->resp->cookie = cpu_to_le16(cookie);
+	crsqe->is_waiter_alive = true;
 	crsqe->req_size = __get_cmdq_base_cmd_size(msg->req, msg->req_sz);
 	if (__get_cmdq_base_resp_size(msg->req, msg->req_sz) && msg->sb) {
 		struct bnxt_qplib_rcfw_sbuf *sbuf = msg->sb;
@@ -347,7 +348,9 @@  static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
 					  struct bnxt_qplib_cmdqmsg *msg)
 {
 	struct creq_qp_event *evnt = (struct creq_qp_event *)msg->resp;
-	u16 cookie;
+	struct bnxt_qplib_crsqe *crsqe;
+	unsigned long flags;
+	u16 cookie, cbit;
 	int rc = 0;
 	u8 opcode;
 
@@ -363,6 +366,7 @@  static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
 
 	cookie = le16_to_cpu(__get_cmdq_base_cookie(msg->req, msg->req_sz))
 				& RCFW_MAX_COOKIE_VALUE;
+	cbit = cookie % rcfw->cmdq_depth;
 
 	if (msg->block)
 		rc = __block_for_resp(rcfw, cookie, opcode);
@@ -378,6 +382,14 @@  static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
 		return rc;
 	}
 
+	if (rc) {
+		spin_lock_irqsave(&rcfw->cmdq.hwq.lock, flags);
+		crsqe = &rcfw->crsqe_tbl[cbit];
+		crsqe->is_waiter_alive = false;
+		spin_unlock_irqrestore(&rcfw->cmdq.hwq.lock, flags);
+		return -ETIMEDOUT;
+	}
+
 	if (evnt->status) {
 		/* failed with status */
 		dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x status %#x\n",
@@ -480,15 +492,16 @@  static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
 	struct creq_qp_error_notification *err_event;
 	struct bnxt_qplib_hwq *hwq = &rcfw->cmdq.hwq;
 	struct bnxt_qplib_crsqe *crsqe;
+	u32 qp_id, tbl_indx, req_size;
 	struct bnxt_qplib_qp *qp;
 	u16 cbit, blocked = 0;
+	bool is_waiter_alive;
 	struct pci_dev *pdev;
 	unsigned long flags;
 	u32 wait_cmds = 0;
 	__le16  mcookie;
 	u16 cookie;
 	int rc = 0;
-	u32 qp_id, tbl_indx;
 
 	pdev = rcfw->pdev;
 	switch (qp_event->event) {
@@ -525,26 +538,25 @@  static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
 		cookie &= RCFW_MAX_COOKIE_VALUE;
 		cbit = cookie % rcfw->cmdq_depth;
 		crsqe = &rcfw->crsqe_tbl[cbit];
-		if (crsqe->resp &&
-		    crsqe->resp->cookie  == mcookie) {
-			memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
-			crsqe->resp = NULL;
-		} else {
-			if (crsqe->resp && crsqe->resp->cookie)
-				dev_err(&pdev->dev,
-					"CMD %s cookie sent=%#x, recd=%#x\n",
-					crsqe->resp ? "mismatch" : "collision",
-					crsqe->resp ? crsqe->resp->cookie : 0,
-					mcookie);
-		}
 		if (!test_and_clear_bit(cbit, rcfw->cmdq.cmdq_bitmap))
 			dev_warn(&pdev->dev,
 				 "CMD bit %d was not requested\n", cbit);
-		hwq->cons += crsqe->req_size;
+
+		if (crsqe->is_waiter_alive) {
+			if (crsqe->resp)
+				memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
+			if (!blocked)
+				wait_cmds++;
+		}
+
+		req_size = crsqe->req_size;
+		is_waiter_alive = crsqe->is_waiter_alive;
+
 		crsqe->req_size = 0;
+		if (!is_waiter_alive)
+			crsqe->resp = NULL;
 
-		if (!blocked)
-			wait_cmds++;
+		hwq->cons += req_size;
 		spin_unlock_irqrestore(&hwq->lock, flags);
 	}
 	*num_wait += wait_cmds;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index 089e616..6ed81c1 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -152,6 +152,7 @@  struct bnxt_qplib_crsqe {
 	u32			req_size;
 	/* Free slots at the time of submission */
 	u32			free_slots;
+	bool			is_waiter_alive;
 };
 
 struct bnxt_qplib_rcfw_sbuf {