diff mbox series

[v3,1/4] scsi: add expecting_media_change flag to error path

Message ID 20210328102531.1114535-2-martin.kepplinger@puri.sm (mailing list archive)
State Not Applicable, archived
Headers show
Series scsi: add runtime PM workaround for SD cardreaders | expand

Commit Message

Martin Kepplinger March 28, 2021, 10:25 a.m. UTC
SD Cardreaders (especially) sometimes lose the state during suspend
and deliver a "media changed" unit attention when really only a
(runtime) suspend/resume cycle has been done.

For such devices, I/O fails when runtime PM is enabled, see below.
That's the motivation to add this flag. If set by a driver the
one following MEDIA CHANGE unit attention will be ignored.

Signed-off-by: Martin Kepplinger <martin.kepplinger@puri.sm>
---
 drivers/scsi/scsi_error.c  | 36 +++++++++++++++++++++++++++++++-----
 include/scsi/scsi_device.h |  1 +
 2 files changed, 32 insertions(+), 5 deletions(-)

Comments

Bart Van Assche March 28, 2021, 4:53 p.m. UTC | #1
On 3/28/21 3:25 AM, Martin Kepplinger wrote:
> diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
> index 08c06c56331c..c62915d34ba4 100644
> --- a/drivers/scsi/scsi_error.c
> +++ b/drivers/scsi/scsi_error.c
> @@ -585,6 +585,18 @@ int scsi_check_sense(struct scsi_cmnd *scmd)
>  				return NEEDS_RETRY;
>  			}
>  		}
> +		if (scmd->device->expecting_media_change) {
> +			if (sshdr.asc == 0x28 && sshdr.ascq == 0x00) {
> +				/*
> +				 * clear the expecting_media_change in
> +				 * scsi_decide_disposition() because we
> +				 * need to catch possible "fail fast" overrides
> +				 * that block readahead can cause.
> +				 */
> +				return NEEDS_RETRY;
> +			}
> +		}

Introducing a new state variable carries some risk, namely that a path
that should set or clear the state variable is overlooked. Is there an
approach that does not require to introduce a new state variable, e.g.
to send a REQUEST SENSE command after a resume?

Thanks,

Bart.
Martin Kepplinger March 29, 2021, 8:05 a.m. UTC | #2
Am Sonntag, dem 28.03.2021 um 09:53 -0700 schrieb Bart Van Assche:
> On 3/28/21 3:25 AM, Martin Kepplinger wrote:
> > diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
> > index 08c06c56331c..c62915d34ba4 100644
> > --- a/drivers/scsi/scsi_error.c
> > +++ b/drivers/scsi/scsi_error.c
> > @@ -585,6 +585,18 @@ int scsi_check_sense(struct scsi_cmnd *scmd)
> >                                 return NEEDS_RETRY;
> >                         }
> >                 }
> > +               if (scmd->device->expecting_media_change) {
> > +                       if (sshdr.asc == 0x28 && sshdr.ascq ==
> > 0x00) {
> > +                               /*
> > +                                * clear the expecting_media_change
> > in
> > +                                * scsi_decide_disposition()
> > because we
> > +                                * need to catch possible "fail
> > fast" overrides
> > +                                * that block readahead can cause.
> > +                                */
> > +                               return NEEDS_RETRY;
> > +                       }
> > +               }
> 
> Introducing a new state variable carries some risk, namely that a
> path
> that should set or clear the state variable is overlooked. Is there
> an
> approach that does not require to introduce a new state variable,
> e.g.
> to send a REQUEST SENSE command after a resume?
> 
> Thanks,
> 
> Bart.

wow, thanks for that. Indeed my first tests succeed with the below
change, that doesn't use the error-path additions at all (not setting
expecting_media_change), and sends a request sense instead.

I'm just too little of a scsi developer that I know whether the below
change correctly does what you had in mind. Does it?


--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3707,6 +3707,10 @@ static int sd_resume_runtime(struct device *dev)
 {
        struct scsi_disk *sdkp = dev_get_drvdata(dev);
        struct scsi_device *sdp = sdkp->device;
+       const int timeout = sdp->request_queue->rq_timeout
+               * SD_FLUSH_TIMEOUT_MULTIPLIER;
+       int retries, res;
+       struct scsi_sense_hdr my_sshdr;
        int ret;
 
        if (!sdkp)      /* E.g.: runtime resume at the start of
sd_probe() */
@@ -3714,10 +3718,25 @@ static int sd_resume_runtime(struct device
*dev)
 
        /*
         * This devices issues a MEDIA CHANGE unit attention when
-        * resuming from suspend. Ignore the next one now.
+        * resuming from suspend.
         */
-       if (sdp->sdev_bflags & BLIST_MEDIA_CHANGE)
-               sdkp->device->expecting_media_change = 1;
+       if (sdp->sdev_bflags & BLIST_MEDIA_CHANGE) {
+               for (retries = 3; retries > 0; --retries) {
+                       unsigned char cmd[10] = { 0 };
+
+                       cmd[0] = REQUEST_SENSE;
+                       /*
+                        * Leave the rest of the command zero to
indicate
+                        * flush everything.
+                        */
+                       res = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0,
NULL, &my_sshdr,
+                                       timeout, sdkp->max_retries, 0,
RQF_PM, NULL);
+                       if (res == 0)
+                               break;
+               }
+       }
 
        return sd_resume(dev);
diff mbox series

Patch

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 08c06c56331c..c62915d34ba4 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -585,6 +585,18 @@  int scsi_check_sense(struct scsi_cmnd *scmd)
 				return NEEDS_RETRY;
 			}
 		}
+		if (scmd->device->expecting_media_change) {
+			if (sshdr.asc == 0x28 && sshdr.ascq == 0x00) {
+				/*
+				 * clear the expecting_media_change in
+				 * scsi_decide_disposition() because we
+				 * need to catch possible "fail fast" overrides
+				 * that block readahead can cause.
+				 */
+				return NEEDS_RETRY;
+			}
+		}
+
 		/*
 		 * we might also expect a cc/ua if another LUN on the target
 		 * reported a UA with an ASC/ASCQ of 3F 0E -
@@ -1977,14 +1989,28 @@  int scsi_decide_disposition(struct scsi_cmnd *scmd)
 	 * the request was not marked fast fail.  Note that above,
 	 * even if the request is marked fast fail, we still requeue
 	 * for queue congestion conditions (QUEUE_FULL or BUSY) */
-	if (scsi_cmd_retry_allowed(scmd) && !scsi_noretry_cmd(scmd)) {
-		return NEEDS_RETRY;
-	} else {
-		/*
-		 * no more retries - report this one back to upper level.
+	if (scsi_cmd_retry_allowed(scmd)) {
+		/* but scsi_noretry_cmd() cannot override the
+		 * expecting_media_change flag.
 		 */
+		if (!scsi_noretry_cmd(scmd) ||
+		    scmd->device->expecting_media_change) {
+			scmd->device->expecting_media_change = 0;
+			return NEEDS_RETRY;
+		}
+
+		/* Not marked fail fast, or marked but not expected.
+		 * Clear the flag too because it's meant for the
+		 * next UA only.
+		 */
+		scmd->device->expecting_media_change = 0;
 		return SUCCESS;
 	}
+
+	/*
+	 * no more retries - report this one back to upper level.
+	 */
+	return SUCCESS;
 }
 
 static void eh_lock_door_done(struct request *req, blk_status_t status)
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 05c7c320ef32..926b42ce1dc4 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -171,6 +171,7 @@  struct scsi_device {
 				 * this device */
 	unsigned expecting_cc_ua:1; /* Expecting a CHECK_CONDITION/UNIT_ATTN
 				     * because we did a bus reset. */
+	unsigned expecting_media_change:1; /* Expecting "media changed" UA */
 	unsigned use_10_for_rw:1; /* first try 10-byte read / write */
 	unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */
 	unsigned set_dbd_for_ms:1; /* Set "DBD" field in mode sense */