diff mbox series

[v3,1/7] qedf: Keep track of num of pending flogi.

Message ID 20200403120957.2431-2-skashyap@marvell.com (mailing list archive)
State Superseded
Headers show
Series qed/qedf: Firmware recovery, bw update and misc fixes. | expand

Commit Message

Saurav Kashyap April 3, 2020, 12:09 p.m. UTC
- Problem: Port not coming up after bringing down the port
  for longer duration.
- Bring down the port from the switch
- wait for fipvlan to exhaust, driver will use
  default vlan (1002) and call fcoe_ctlr_link_up
- libfc/fcoe will start sending FLOGI
- bring back the port and switch discard FLOGI
  because vlan is different.
- keep track of pending flogi and if it increases
  certain number then do ctx reset and it will do
  fipvlan again.

Signed-off-by: Saurav Kashyap <skashyap@marvell.com>
---
 drivers/scsi/qedf/qedf.h      |  2 ++
 drivers/scsi/qedf/qedf_main.c | 23 +++++++++++++++++++++++
 2 files changed, 25 insertions(+)

Comments

Martin K. Petersen April 14, 2020, 1:19 a.m. UTC | #1
Saurav,

Please, no "." at the end of Subject: lines.

> - Problem: Port not coming up after bringing down the port
>   for longer duration.
> - Bring down the port from the switch
> - wait for fipvlan to exhaust, driver will use
>   default vlan (1002) and call fcoe_ctlr_link_up
> - libfc/fcoe will start sending FLOGI
> - bring back the port and switch discard FLOGI
>   because vlan is different.
> - keep track of pending flogi and if it increases
>   certain number then do ctx reset and it will do
>   fipvlan again.

That doesn't look like a proper commit message.

How about something like:

    If a port is brought down for an extended period of time, the
    fipvlan counter gets exhausted and the driver will fall back to
    default VLAN 1002 and call fcoe_ctlr_link_up to log in. However, the
    switch will discard the FLOGI attempt because the VLAN is now
    different.

    Keep track of the number of FLOGI attempts and if a threshold of
    QEDF_FLOGI_RETRY_CNT is exceeded, perform a context soft reset.
Saurav Kashyap April 14, 2020, 4:13 a.m. UTC | #2
Hi Martin,

> -----Original Message-----
> From: Martin K. Petersen <martin.petersen@oracle.com>
> Sent: Tuesday, April 14, 2020 6:50 AM
> To: Saurav Kashyap <skashyap@marvell.com>
> Cc: martin.petersen@oracle.com; GR-QLogic-Storage-Upstream <GR-QLogic-
> Storage-Upstream@marvell.com>; linux-scsi@vger.kernel.org; Javed Hasan
> <jhasan@marvell.com>; netdev@vger.kernel.org
> Subject: [EXT] Re: [PATCH v3 1/7] qedf: Keep track of num of pending flogi.
> 
> External Email
> 
> ----------------------------------------------------------------------
> 
> Saurav,
> 
> Please, no "." at the end of Subject: lines.
> 
> > - Problem: Port not coming up after bringing down the port
> >   for longer duration.
> > - Bring down the port from the switch
> > - wait for fipvlan to exhaust, driver will use
> >   default vlan (1002) and call fcoe_ctlr_link_up
> > - libfc/fcoe will start sending FLOGI
> > - bring back the port and switch discard FLOGI
> >   because vlan is different.
> > - keep track of pending flogi and if it increases
> >   certain number then do ctx reset and it will do
> >   fipvlan again.
> 
> That doesn't look like a proper commit message.
> 
> How about something like:
> 
>     If a port is brought down for an extended period of time, the
>     fipvlan counter gets exhausted and the driver will fall back to
>     default VLAN 1002 and call fcoe_ctlr_link_up to log in. However, the
>     switch will discard the FLOGI attempt because the VLAN is now
>     different.
> 
>     Keep track of the number of FLOGI attempts and if a threshold of
>     QEDF_FLOGI_RETRY_CNT is exceeded, perform a context soft reset.

<SK> Looks good.

Thanks,
~Saurav
> 
> --
> Martin K. Petersen	Oracle Linux Engineering
diff mbox series

Patch

diff --git a/drivers/scsi/qedf/qedf.h b/drivers/scsi/qedf/qedf.h
index f3f399f..042ebf6 100644
--- a/drivers/scsi/qedf/qedf.h
+++ b/drivers/scsi/qedf/qedf.h
@@ -388,6 +388,7 @@  struct qedf_ctx {
 	mempool_t *io_mempool;
 	struct workqueue_struct *dpc_wq;
 	struct delayed_work grcdump_work;
+	struct delayed_work stag_work;
 
 	u32 slow_sge_ios;
 	u32 fast_sge_ios;
@@ -403,6 +404,7 @@  struct qedf_ctx {
 
 	u32 flogi_cnt;
 	u32 flogi_failed;
+	u32 flogi_pending;
 
 	/* Used for fc statistics */
 	struct mutex stats_mutex;
diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index 604856e..ee468102 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -282,6 +282,7 @@  static void qedf_flogi_resp(struct fc_seq *seq, struct fc_frame *fp,
 	else if (fc_frame_payload_op(fp) == ELS_LS_ACC) {
 		/* Set the source MAC we will use for FCoE traffic */
 		qedf_set_data_src_addr(qedf, fp);
+		qedf->flogi_pending = 0;
 	}
 
 	/* Complete flogi_compl so we can proceed to sending ADISCs */
@@ -307,6 +308,11 @@  static struct fc_seq *qedf_elsct_send(struct fc_lport *lport, u32 did,
 	 */
 	if (resp == fc_lport_flogi_resp) {
 		qedf->flogi_cnt++;
+		if (qedf->flogi_pending >= QEDF_FLOGI_RETRY_CNT) {
+			schedule_delayed_work(&qedf->stag_work, 2);
+			return NULL;
+		}
+		qedf->flogi_pending++;
 		return fc_elsct_send(lport, did, fp, op, qedf_flogi_resp,
 		    arg, timeout);
 	}
@@ -850,6 +856,7 @@  void qedf_ctx_soft_reset(struct fc_lport *lport)
 
 	qedf = lport_priv(lport);
 
+	qedf->flogi_pending = 0;
 	/* For host reset, essentially do a soft link up/down */
 	atomic_set(&qedf->link_state, QEDF_LINK_DOWN);
 	QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_DISC,
@@ -3205,6 +3212,7 @@  static int __qedf_probe(struct pci_dev *pdev, int mode)
 		init_completion(&qedf->fipvlan_compl);
 		mutex_init(&qedf->stats_mutex);
 		mutex_init(&qedf->flush_mutex);
+		qedf->flogi_pending = 0;
 
 		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_INFO,
 		   "QLogic FastLinQ FCoE Module qedf %s, "
@@ -3235,6 +3243,7 @@  static int __qedf_probe(struct pci_dev *pdev, int mode)
 	INIT_DELAYED_WORK(&qedf->link_update, qedf_handle_link_update);
 	INIT_DELAYED_WORK(&qedf->link_recovery, qedf_link_recovery);
 	INIT_DELAYED_WORK(&qedf->grcdump_work, qedf_wq_grcdump);
+	INIT_DELAYED_WORK(&qedf->stag_work, qedf_stag_change_work);
 	qedf->fipvlan_retries = qedf_fipvlan_retries;
 	/* Set a default prio in case DCBX doesn't converge */
 	if (qedf_default_prio > -1) {
@@ -3770,6 +3779,20 @@  void qedf_get_protocol_tlv_data(void *dev, void *data)
 	fcoe->scsi_tsk_full = qedf->task_set_fulls;
 }
 
+/* Deferred work function to perform soft context reset on STAG change */
+void qedf_stag_change_work(struct work_struct *work)
+{
+	struct qedf_ctx *qedf =
+	    container_of(work, struct qedf_ctx, stag_work.work);
+
+	if (!qedf) {
+		QEDF_ERR(&qedf->dbg_ctx, "qedf is NULL");
+		return;
+	}
+	QEDF_ERR(&qedf->dbg_ctx, "Performing software context reset.\n");
+	qedf_ctx_soft_reset(qedf->lport);
+}
+
 static void qedf_shutdown(struct pci_dev *pdev)
 {
 	__qedf_remove(pdev, QEDF_MODE_NORMAL);