diff mbox series

[v2] scsi: qedi: Add support for fastpath doorvell recovery

Message ID 20210804221412.5048-1-smalin@marvell.com (mailing list archive)
State Accepted
Headers show
Series [v2] scsi: qedi: Add support for fastpath doorvell recovery | expand

Commit Message

Shai Malin Aug. 4, 2021, 10:14 p.m. UTC
Driver fastpath employs doorbells to indicate to the device that work
is available. Each doorbell translates to a message sent to the device
over the pci. These messages are queued by the doorbell queue HW block,
and handled by the HW.
If a sufficient amount of CPU cores are sending messages at a sufficient
rate, the queue can overflow, and messages can be dropped. There are
many entities in the driver which can send doorbell messages.
When overflow happens, a fatal HW attention is indicated, and the
Doorbell HW block stops accepting new doorbell messages until recovery
procedure is done.

When overflow occurs, all doorbells are dropped. Since doorbells are
aggregatives, if more doorbells are sent nothing has to be done.
But if the "last" doorbell is dropped, the doorbelling entity doesn’t know
this happened, and may wait forever for the device to perform the action.
The doorbell recovery mechanism addresses just that - it sends the last
doorbell of every entity.

Signed-off-by: Manish Rangankar <mrangankar@marvell.com>
Signed-off-by: Shai Malin <smalin@marvell.com>
---
 drivers/scsi/qedi/qedi_fw.c    | 14 ++++----------
 drivers/scsi/qedi/qedi_iscsi.c | 33 ++++++++++++++++++++++++++++++++-
 drivers/scsi/qedi/qedi_iscsi.h |  1 +
 3 files changed, 37 insertions(+), 11 deletions(-)

Comments

Martin K. Petersen Aug. 10, 2021, 3:56 a.m. UTC | #1
Shai,

> Driver fastpath employs doorbells to indicate to the device that work
> is available. Each doorbell translates to a message sent to the device
> over the pci. These messages are queued by the doorbell queue HW
> block, and handled by the HW.

Applied to 5.15/scsi-staging, thanks!
Guenter Roeck Aug. 11, 2021, 3:47 p.m. UTC | #2
On Thu, Aug 05, 2021 at 01:14:12AM +0300, Shai Malin wrote:
> Driver fastpath employs doorbells to indicate to the device that work
> is available. Each doorbell translates to a message sent to the device
> over the pci. These messages are queued by the doorbell queue HW block,
> and handled by the HW.
> If a sufficient amount of CPU cores are sending messages at a sufficient
> rate, the queue can overflow, and messages can be dropped. There are
> many entities in the driver which can send doorbell messages.
> When overflow happens, a fatal HW attention is indicated, and the
> Doorbell HW block stops accepting new doorbell messages until recovery
> procedure is done.
> 
> When overflow occurs, all doorbells are dropped. Since doorbells are
> aggregatives, if more doorbells are sent nothing has to be done.
> But if the "last" doorbell is dropped, the doorbelling entity doesn’t know
> this happened, and may wait forever for the device to perform the action.
> The doorbell recovery mechanism addresses just that - it sends the last
> doorbell of every entity.
> 
> Signed-off-by: Manish Rangankar <mrangankar@marvell.com>
> Signed-off-by: Shai Malin <smalin@marvell.com>
> ---
>  drivers/scsi/qedi/qedi_fw.c    | 14 ++++----------
>  drivers/scsi/qedi/qedi_iscsi.c | 33 ++++++++++++++++++++++++++++++++-
>  drivers/scsi/qedi/qedi_iscsi.h |  1 +
>  3 files changed, 37 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c
> index 71333d3c5c86..e7064f62c833 100644
> --- a/drivers/scsi/qedi/qedi_fw.c
> +++ b/drivers/scsi/qedi/qedi_fw.c
> @@ -936,17 +936,11 @@ void qedi_fp_process_cqes(struct qedi_work *work)
>  
>  static void qedi_ring_doorbell(struct qedi_conn *qedi_conn)
>  {
> -	struct iscsi_db_data dbell = { 0 };
> +	qedi_conn->ep->db_data.sq_prod = qedi_conn->ep->fw_sq_prod_idx;
>  
> -	dbell.agg_flags = 0;
> -
> -	dbell.params |= DB_DEST_XCM << ISCSI_DB_DATA_DEST_SHIFT;
> -	dbell.params |= DB_AGG_CMD_SET << ISCSI_DB_DATA_AGG_CMD_SHIFT;
> -	dbell.params |=
> -		   DQ_XCM_ISCSI_SQ_PROD_CMD << ISCSI_DB_DATA_AGG_VAL_SEL_SHIFT;
> -
> -	dbell.sq_prod = qedi_conn->ep->fw_sq_prod_idx;
> -	writel(*(u32 *)&dbell, qedi_conn->ep->p_doorbell);
> +	/* wmb - Make sure fw idx is coherent */
> +	wmb();
> +	writel(*(u32 *)&qedi_conn->ep->db_data, qedi_conn->ep->p_doorbell);
>  
>  	/* Make sure fw write idx is coherent, and include both memory barriers
>  	 * as a failsafe as for some architectures the call is the same but on
> diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c
> index 97f83760da88..8ac8aabc1ef6 100644
> --- a/drivers/scsi/qedi/qedi_iscsi.c
> +++ b/drivers/scsi/qedi/qedi_iscsi.c
> @@ -499,8 +499,8 @@ static u16 qedi_calc_mss(u16 pmtu, u8 is_ipv6, u8 tcp_ts_en, u8 vlan_en)
>  
>  static int qedi_iscsi_offload_conn(struct qedi_endpoint *qedi_ep)
>  {
> -	struct qedi_ctx *qedi = qedi_ep->qedi;
>  	struct qed_iscsi_params_offload *conn_info;
> +	struct qedi_ctx *qedi = qedi_ep->qedi;
>  	int rval;
>  	int i;
>  
> @@ -577,8 +577,34 @@ static int qedi_iscsi_offload_conn(struct qedi_endpoint *qedi_ep)
>  		  "Default cq index [%d], mss [%d]\n",
>  		  conn_info->default_cq, conn_info->mss);
>  
> +	/* Prepare the doorbell parameters */
> +	qedi_ep->db_data.agg_flags = 0;
> +	qedi_ep->db_data.params = 0;
> +	SET_FIELD(qedi_ep->db_data.params, ISCSI_DB_DATA_DEST, DB_DEST_XCM);
> +	SET_FIELD(qedi_ep->db_data.params, ISCSI_DB_DATA_AGG_CMD,
> +		  DB_AGG_CMD_MAX);
> +	SET_FIELD(qedi_ep->db_data.params, ISCSI_DB_DATA_AGG_VAL_SEL,
> +		  DQ_XCM_ISCSI_SQ_PROD_CMD);
> +	SET_FIELD(qedi_ep->db_data.params, ISCSI_DB_DATA_BYPASS_EN, 1);
> +
> +	/* Register doorbell with doorbell recovery mechanism */
> +	rval = qedi_ops->common->db_recovery_add(qedi->cdev,
> +						 qedi_ep->p_doorbell,
> +						 &qedi_ep->db_data,
> +						 DB_REC_WIDTH_32B,
> +						 DB_REC_KERNEL);
> +	if (rval) {
> +		kfree(conn_info);
> +		return rval;
> +	}
> +
>  	rval = qedi_ops->offload_conn(qedi->cdev, qedi_ep->handle, conn_info);
>  	if (rval)
> +		/* delete doorbell from doorbell recovery mechanism */
> +		rval = qedi_ops->common->db_recovery_del(qedi->cdev,
> +							 qedi_ep->p_doorbell,
> +							 &qedi_ep->db_data);
> +
>  		QEDI_ERR(&qedi->dbg_ctx, "offload_conn returned %d, ep=%p\n",
>  			 rval, qedi_ep);

Due to missing { } this will now always result in QEDI_ERR() execution
(and possibly a C compiler warning).

Guenter

>  
> @@ -1109,6 +1135,11 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep)
>  	    test_bit(QEDI_IN_RECOVERY, &qedi->flags))
>  		goto ep_release_conn;
>  
> +	/* Delete doorbell from doorbell recovery mechanism */
> +	ret = qedi_ops->common->db_recovery_del(qedi->cdev,
> +					       qedi_ep->p_doorbell,
> +					       &qedi_ep->db_data);
> +
>  	ret = qedi_ops->destroy_conn(qedi->cdev, qedi_ep->handle, abrt_conn);
>  	if (ret) {
>  		QEDI_WARN(&qedi->dbg_ctx,
> diff --git a/drivers/scsi/qedi/qedi_iscsi.h b/drivers/scsi/qedi/qedi_iscsi.h
> index 758735209e15..a31c5de74754 100644
> --- a/drivers/scsi/qedi/qedi_iscsi.h
> +++ b/drivers/scsi/qedi/qedi_iscsi.h
> @@ -80,6 +80,7 @@ struct qedi_endpoint {
>  	u32 handle;
>  	u32 fw_cid;
>  	void __iomem *p_doorbell;
> +	struct iscsi_db_data db_data;
>  
>  	/* Send queue management */
>  	struct iscsi_wqe *sq;
> -- 
> 2.22.0
>
Martin K. Petersen Aug. 12, 2021, 2:52 a.m. UTC | #3
Guenter,

>>  	rval = qedi_ops->offload_conn(qedi->cdev, qedi_ep->handle, conn_info);
>>  	if (rval)
>> +		/* delete doorbell from doorbell recovery mechanism */
>> +		rval = qedi_ops->common->db_recovery_del(qedi->cdev,
>> +							 qedi_ep->p_doorbell,
>> +							 &qedi_ep->db_data);
>> +
>>  		QEDI_ERR(&qedi->dbg_ctx, "offload_conn returned %d, ep=%p\n",
>>  			 rval, qedi_ep);
>
> Due to missing { } this will now always result in QEDI_ERR() execution
> (and possibly a C compiler warning).

Fixed up. Thanks for spotting!
Martin K. Petersen Aug. 17, 2021, 3:17 a.m. UTC | #4
On Thu, 5 Aug 2021 01:14:12 +0300, Shai Malin wrote:

> Driver fastpath employs doorbells to indicate to the device that work
> is available. Each doorbell translates to a message sent to the device
> over the pci. These messages are queued by the doorbell queue HW block,
> and handled by the HW.
> If a sufficient amount of CPU cores are sending messages at a sufficient
> rate, the queue can overflow, and messages can be dropped. There are
> many entities in the driver which can send doorbell messages.
> When overflow happens, a fatal HW attention is indicated, and the
> Doorbell HW block stops accepting new doorbell messages until recovery
> procedure is done.
> 
> [...]

Applied to 5.15/scsi-queue, thanks!

[1/1] scsi: qedi: Add support for fastpath doorvell recovery
      https://git.kernel.org/mkp/scsi/c/9757f8af0442
diff mbox series

Patch

diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c
index 71333d3c5c86..e7064f62c833 100644
--- a/drivers/scsi/qedi/qedi_fw.c
+++ b/drivers/scsi/qedi/qedi_fw.c
@@ -936,17 +936,11 @@  void qedi_fp_process_cqes(struct qedi_work *work)
 
 static void qedi_ring_doorbell(struct qedi_conn *qedi_conn)
 {
-	struct iscsi_db_data dbell = { 0 };
+	qedi_conn->ep->db_data.sq_prod = qedi_conn->ep->fw_sq_prod_idx;
 
-	dbell.agg_flags = 0;
-
-	dbell.params |= DB_DEST_XCM << ISCSI_DB_DATA_DEST_SHIFT;
-	dbell.params |= DB_AGG_CMD_SET << ISCSI_DB_DATA_AGG_CMD_SHIFT;
-	dbell.params |=
-		   DQ_XCM_ISCSI_SQ_PROD_CMD << ISCSI_DB_DATA_AGG_VAL_SEL_SHIFT;
-
-	dbell.sq_prod = qedi_conn->ep->fw_sq_prod_idx;
-	writel(*(u32 *)&dbell, qedi_conn->ep->p_doorbell);
+	/* wmb - Make sure fw idx is coherent */
+	wmb();
+	writel(*(u32 *)&qedi_conn->ep->db_data, qedi_conn->ep->p_doorbell);
 
 	/* Make sure fw write idx is coherent, and include both memory barriers
 	 * as a failsafe as for some architectures the call is the same but on
diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c
index 97f83760da88..8ac8aabc1ef6 100644
--- a/drivers/scsi/qedi/qedi_iscsi.c
+++ b/drivers/scsi/qedi/qedi_iscsi.c
@@ -499,8 +499,8 @@  static u16 qedi_calc_mss(u16 pmtu, u8 is_ipv6, u8 tcp_ts_en, u8 vlan_en)
 
 static int qedi_iscsi_offload_conn(struct qedi_endpoint *qedi_ep)
 {
-	struct qedi_ctx *qedi = qedi_ep->qedi;
 	struct qed_iscsi_params_offload *conn_info;
+	struct qedi_ctx *qedi = qedi_ep->qedi;
 	int rval;
 	int i;
 
@@ -577,8 +577,34 @@  static int qedi_iscsi_offload_conn(struct qedi_endpoint *qedi_ep)
 		  "Default cq index [%d], mss [%d]\n",
 		  conn_info->default_cq, conn_info->mss);
 
+	/* Prepare the doorbell parameters */
+	qedi_ep->db_data.agg_flags = 0;
+	qedi_ep->db_data.params = 0;
+	SET_FIELD(qedi_ep->db_data.params, ISCSI_DB_DATA_DEST, DB_DEST_XCM);
+	SET_FIELD(qedi_ep->db_data.params, ISCSI_DB_DATA_AGG_CMD,
+		  DB_AGG_CMD_MAX);
+	SET_FIELD(qedi_ep->db_data.params, ISCSI_DB_DATA_AGG_VAL_SEL,
+		  DQ_XCM_ISCSI_SQ_PROD_CMD);
+	SET_FIELD(qedi_ep->db_data.params, ISCSI_DB_DATA_BYPASS_EN, 1);
+
+	/* Register doorbell with doorbell recovery mechanism */
+	rval = qedi_ops->common->db_recovery_add(qedi->cdev,
+						 qedi_ep->p_doorbell,
+						 &qedi_ep->db_data,
+						 DB_REC_WIDTH_32B,
+						 DB_REC_KERNEL);
+	if (rval) {
+		kfree(conn_info);
+		return rval;
+	}
+
 	rval = qedi_ops->offload_conn(qedi->cdev, qedi_ep->handle, conn_info);
 	if (rval)
+		/* delete doorbell from doorbell recovery mechanism */
+		rval = qedi_ops->common->db_recovery_del(qedi->cdev,
+							 qedi_ep->p_doorbell,
+							 &qedi_ep->db_data);
+
 		QEDI_ERR(&qedi->dbg_ctx, "offload_conn returned %d, ep=%p\n",
 			 rval, qedi_ep);
 
@@ -1109,6 +1135,11 @@  static void qedi_ep_disconnect(struct iscsi_endpoint *ep)
 	    test_bit(QEDI_IN_RECOVERY, &qedi->flags))
 		goto ep_release_conn;
 
+	/* Delete doorbell from doorbell recovery mechanism */
+	ret = qedi_ops->common->db_recovery_del(qedi->cdev,
+					       qedi_ep->p_doorbell,
+					       &qedi_ep->db_data);
+
 	ret = qedi_ops->destroy_conn(qedi->cdev, qedi_ep->handle, abrt_conn);
 	if (ret) {
 		QEDI_WARN(&qedi->dbg_ctx,
diff --git a/drivers/scsi/qedi/qedi_iscsi.h b/drivers/scsi/qedi/qedi_iscsi.h
index 758735209e15..a31c5de74754 100644
--- a/drivers/scsi/qedi/qedi_iscsi.h
+++ b/drivers/scsi/qedi/qedi_iscsi.h
@@ -80,6 +80,7 @@  struct qedi_endpoint {
 	u32 handle;
 	u32 fw_cid;
 	void __iomem *p_doorbell;
+	struct iscsi_db_data db_data;
 
 	/* Send queue management */
 	struct iscsi_wqe *sq;