diff mbox

[06/10] lpfc: Check if SCSI scanning is complete, before allowing I/O commands

Message ID 20180505033759.6715-7-jsmart2021@gmail.com (mailing list archive)
State Changes Requested
Headers show

Commit Message

James Smart May 5, 2018, 3:37 a.m. UTC
A race condition is being seen. I/O is being issued to scsi targets
simultaneous to a short connectivity loss. The rport block/unblock is
occurring, but the unblock is allowing pending I/O commands to intermix
with the commands being used to scan the scsi bus. Thus the target
device is getting a read or write prior to transitioning to an ready
state, thus the device errors the io. The io error propagatest all the
way back to the application.

Resolve by augmenting the io submission with a check to see whether
transport is scanning the remote port or not. If not, then the io is
failed such that it will be retried. If so, then normal processing resumes.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
---
 drivers/scsi/lpfc/lpfc_scsi.c | 48 +++++++++++++++++++++++++++++++++++++++++++
 drivers/scsi/lpfc/lpfc_scsi.h | 15 ++++++++++++++
 2 files changed, 63 insertions(+)

Comments

Martin K. Petersen May 8, 2018, 5:18 a.m. UTC | #1
James,

Not a fan of these:

> +static int
> +lpfc_is_scan_cmd(uint8_t opcode)
> +{
> +	switch (opcode & 0x1f) {
> +	case LPFC_INQUIRY_CMD_CODE:
> +	case LPFC_LOG_SELECT_CMD_CODE:
> +	case LPFC_LOG_SENSE_CMD_CODE:

Especially since they are masked off twice:

> +#define LPFC_INQUIRY_CMD_CODE			(INQUIRY & 0x1f)
> +#define LPFC_LOG_SELECT_CMD_CODE		(LOG_SELECT & 0x1f)
> +#define LPFC_LOG_SENSE_CMD_CODE			(LOG_SENSE &
> 0x1f)
Ewan Milne May 9, 2018, 3:13 p.m. UTC | #2
On Fri, 2018-05-04 at 20:37 -0700, James Smart wrote:
> A race condition is being seen. I/O is being issued to scsi targets
> simultaneous to a short connectivity loss. The rport block/unblock is
> occurring, but the unblock is allowing pending I/O commands to intermix
> with the commands being used to scan the scsi bus. Thus the target
> device is getting a read or write prior to transitioning to an ready
> state, thus the device errors the io. The io error propagatest all the
> way back to the application.
> 
> Resolve by augmenting the io submission with a check to see whether
> transport is scanning the remote port or not. If not, then the io is
> failed such that it will be retried. If so, then normal processing resumes.

This seems like a hack to me.  What exactly is the problem, is the
target rejecting the READ and/or WRITE commands for some reason?
Can you explain what you mean by "..transitioning to an ready state"?

Perhaps this would be better handled by some kind of interlock in
the transport layer with the scan_work?

-Ewan
diff mbox

Patch

diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 7932bf30c8d7..303f1ad0c27f 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -4506,6 +4506,43 @@  void lpfc_poll_timeout(struct timer_list *t)
 }
 
 /**
+ * lpfc_is_scan_cmd - check command code within opcode for scanning related cmd
+ * @opcode: opcode of scsi_cmnd data structure.
+ *
+ * scsi midlayer may submit I/O commands before scanning is complete caused by
+ * lip in transport layer.
+ * This routine checks if the scsi cmd is related to scanning following FCP-4
+ * discovery procedure.
+ *
+ * Return value :
+ *   0 - Not a scanning related command
+ *   1 - Is a scanning related command
+ **/
+static int
+lpfc_is_scan_cmd(uint8_t opcode)
+{
+	switch (opcode & 0x1f) {
+	case LPFC_INQUIRY_CMD_CODE:
+	case LPFC_LOG_SELECT_CMD_CODE:
+	case LPFC_LOG_SENSE_CMD_CODE:
+	case LPFC_MODE_SELECT_CMD_CODE:
+	case LPFC_MODE_SENSE_CMD_CODE:
+	case LPFC_REPORT_LUNS_CMD_CODE:
+	case LPFC_SEND_DIAGNOSTIC_CMD_CODE:
+	case LPFC_MAINTENANCE_IN_CMD_CODE:
+	case LPFC_MAINTENANCE_OUT_CMD_CODE:
+	case LPFC_PERSISTENT_RESERVE_IN_CMD_CODE:
+	case LPFC_PERSISTENT_RESERVE_OUT_CMD_CODE:
+	case LPFC_READ_BUFFER_CMD_CODE:
+	case LPFC_WRITE_BUFFER_CMD_CODE:
+		break;
+	default:
+		return 0;
+	}
+	return 1;
+}
+
+/**
  * lpfc_queuecommand - scsi_host_template queuecommand entry point
  * @cmnd: Pointer to scsi_cmnd data structure.
  * @done: Pointer to done routine.
@@ -4537,6 +4574,17 @@  lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
 	}
 	ndlp = rdata->pnode;
 
+	if ((rport->flags & FC_RPORT_SCAN_PENDING) &&
+	    !(lpfc_is_scan_cmd(cmnd->cmnd[0]))) {
+		lpfc_printf_vlog(vport, KERN_INFO, LOG_FCP,
+				 "6423 scan still pending, "
+				 "command = 0x%02x to be "
+				 "retried\n",
+				 cmnd->cmnd[0]);
+		cmnd->result = DID_IMM_RETRY << 16;
+		goto out_fail_command;
+	}
+
 	if ((scsi_get_prot_op(cmnd) != SCSI_PROT_NORMAL) &&
 		(!(phba->sli3_options & LPFC_SLI3_BG_ENABLED))) {
 
diff --git a/drivers/scsi/lpfc/lpfc_scsi.h b/drivers/scsi/lpfc/lpfc_scsi.h
index 8e38e0204c47..68c8a24c659c 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.h
+++ b/drivers/scsi/lpfc/lpfc_scsi.h
@@ -194,5 +194,20 @@  struct lpfc_scsi_buf {
 
 #define TXRDY_PAYLOAD_LEN	12
 
+/* Safe command codes for scanning, excluding group code */
+#define LPFC_INQUIRY_CMD_CODE			(INQUIRY & 0x1f)
+#define LPFC_LOG_SELECT_CMD_CODE		(LOG_SELECT & 0x1f)
+#define LPFC_LOG_SENSE_CMD_CODE			(LOG_SENSE & 0x1f)
+#define LPFC_MODE_SELECT_CMD_CODE		(MODE_SELECT & 0x1f)
+#define LPFC_MODE_SENSE_CMD_CODE		(MODE_SENSE & 0x1f)
+#define LPFC_REPORT_LUNS_CMD_CODE		(REPORT_LUNS & 0x1f)
+#define LPFC_SEND_DIAGNOSTIC_CMD_CODE		(SEND_DIAGNOSTIC & 0x1f)
+#define LPFC_MAINTENANCE_IN_CMD_CODE		(MAINTENANCE_IN & 0x1f)
+#define LPFC_MAINTENANCE_OUT_CMD_CODE		(MAINTENANCE_OUT & 0x1f)
+#define LPFC_PERSISTENT_RESERVE_IN_CMD_CODE	(PERSISTENT_RESERVE_IN & 0x1f)
+#define LPFC_PERSISTENT_RESERVE_OUT_CMD_CODE	(PERSISTENT_RESERVE_OUT & 0x1f)
+#define LPFC_READ_BUFFER_CMD_CODE		(READ_BUFFER & 0x1f)
+#define LPFC_WRITE_BUFFER_CMD_CODE		(WRITE_BUFFER & 0x1f)
+
 int lpfc_sli4_scmd_to_wqidx_distr(struct lpfc_hba *phba,
 				  struct lpfc_scsi_buf *lpfc_cmd);