diff mbox series

[v11,10/34] scsi: Have scsi-ml retry sd_spinup_disk errors

Message ID 20230905231547.83945-11-michael.christie@oracle.com (mailing list archive)
State Changes Requested
Headers show
Series [v11,01/34] scsi: Add helper to prep sense during error handling | expand

Commit Message

Mike Christie Sept. 5, 2023, 11:15 p.m. UTC
This simplifies sd_spinup_disk so scsi-ml retries errors for it. Note that
we retried specifically on a UA and also if scsi_status_is_good returned
failed which would happen for all check conditions. In this patch we use
SCMD_FAILURE_STAT_ANY which will trigger for the same conditions as
when scsi_status_is_good returns false. This will cover all CCs including
UAs so there is no explicit failures arrary entry for UAs.

We do not handle the outside loop's retries because we want to sleep
between tries and we don't support that yet.

Signed-off-by: Mike Christie <michael.christie@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/scsi/sd.c | 73 ++++++++++++++++++++++++++---------------------
 1 file changed, 41 insertions(+), 32 deletions(-)

Comments

Martin Wilck Sept. 15, 2023, 8:46 p.m. UTC | #1
On Tue, 2023-09-05 at 18:15 -0500, Mike Christie wrote:
> This simplifies sd_spinup_disk so scsi-ml retries errors for it. Note
> that
> we retried specifically on a UA and also if scsi_status_is_good
> returned
> failed which would happen for all check conditions. In this patch we
> use
> SCMD_FAILURE_STAT_ANY which will trigger for the same conditions as
> when scsi_status_is_good returns false. This will cover all CCs
> including
> UAs so there is no explicit failures arrary entry for UAs.
> 
> We do not handle the outside loop's retries because we want to sleep
> between tries and we don't support that yet.
> 
> Signed-off-by: Mike Christie <michael.christie@oracle.com>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  drivers/scsi/sd.c | 73 ++++++++++++++++++++++++++-------------------
> --
>  1 file changed, 41 insertions(+), 32 deletions(-)
> 
> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> index 74967e1b19da..f5e6b5cc762f 100644
> --- a/drivers/scsi/sd.c
> +++ b/drivers/scsi/sd.c
> @@ -2151,55 +2151,64 @@ static int sd_done(struct scsi_cmnd *SCpnt)
>  static void
>  sd_spinup_disk(struct scsi_disk *sdkp)
>  {
> -       unsigned char cmd[10];
> +       static const u8 cmd[10] = { TEST_UNIT_READY };
>         unsigned long spintime_expire = 0;
> -       int retries, spintime;
> +       int spintime, sense_valid = 0;
>         unsigned int the_result;
>         struct scsi_sense_hdr sshdr;
> +       struct scsi_failure failures[] = {
> +               /* Fail immediately for Medium Not Present */
> +               {
> +                       .sense = UNIT_ATTENTION,
> +                       .asc = 0x3A,

Shouldn't you set .ascq = SCMD_FAILURE_ASCQ_ANY here, and below as
well?

> +                       .allowed = 0,
> +                       .result = SAM_STAT_CHECK_CONDITION,
> +               },
> +               {
> +                       .sense = NOT_READY,
> +                       .asc = 0x3A,
> +                       .allowed = 0,
> +                       .result = SAM_STAT_CHECK_CONDITION,
> +               },
> +               {
> +                       .result = SCMD_FAILURE_STAT_ANY,
> +                       .allowed = 3,
> +               },
> +               {}
> +       };
>         const struct scsi_exec_args exec_args = {
>                 .sshdr = &sshdr,
> +               .failures = failures,
>         };
> -       int sense_valid = 0;
>  
>         spintime = 0;
>  
>         /* Spin up drives, as required.  Only do this at boot time */
>         /* Spinup needs to be done for module loads too. */
>         do {
> -               retries = 0;
> -
> -               do {
> -                       bool media_was_present = sdkp->media_present;
> +               bool media_was_present = sdkp->media_present;
>  
> -                       cmd[0] = TEST_UNIT_READY;
> -                       memset((void *) &cmd[1], 0, 9);
> +               scsi_reset_failures(failures);
>  
> -                       the_result = scsi_execute_cmd(sdkp->device,
> cmd,
> -                                                     REQ_OP_DRV_IN,
> NULL, 0,
> -                                                     SD_TIMEOUT,
> -                                                     sdkp-
> >max_retries,
> -                                                     &exec_args);
> +               the_result = scsi_execute_cmd(sdkp->device, cmd,
> REQ_OP_DRV_IN,
> +                                             NULL, 0, SD_TIMEOUT,
> +                                             sdkp->max_retries,
> &exec_args);
>  
> -                       if (the_result > 0) {
> -                               /*
> -                                * If the drive has indicated to us
> that it
> -                                * doesn't have any media in it,
> don't bother
> -                                * with any more polling.
> -                                */
> -                               if (media_not_present(sdkp, &sshdr))
> {
> -                                       if (media_was_present)
> -
>                                                sd_printk(KERN_NOTICE,
> sdkp,
> -                                                         "Media
> removed, stopped polling\n");
> -                                       return;
> -                               }
>  
> -                               sense_valid =
> scsi_sense_valid(&sshdr);
> +               if (the_result > 0) {
> +                       /*
> +                        * If the drive has indicated to us that it
> doesn't
> +                        * have any media in it, don't bother with
> any more
> +                        * polling.
> +                        */
> +                       if (media_not_present(sdkp, &sshdr)) {
> +                               if (media_was_present)
> +                                       sd_printk(KERN_NOTICE, sdkp,
> +                                                 "Media removed,
> stopped polling\n");
> +                               return;
>                         }
> -                       retries++;
> -               } while (retries < 3 &&
> -                        (!scsi_status_is_good(the_result) ||
> -                         (scsi_status_is_check_condition(the_result)
> &&
> -                         sense_valid && sshdr.sense_key ==
> UNIT_ATTENTION)));
> +                       sense_valid = scsi_sense_valid(&sshdr);
> +               }
>  
>                 if (!scsi_status_is_check_condition(the_result)) {
>                         /* no sense, TUR either succeeded or failed
Mike Christie Sept. 15, 2023, 8:58 p.m. UTC | #2
On 9/15/23 3:46 PM, Martin Wilck wrote:
>>  sd_spinup_disk(struct scsi_disk *sdkp)
>>  {
>> -       unsigned char cmd[10];
>> +       static const u8 cmd[10] = { TEST_UNIT_READY };
>>         unsigned long spintime_expire = 0;
>> -       int retries, spintime;
>> +       int spintime, sense_valid = 0;
>>         unsigned int the_result;
>>         struct scsi_sense_hdr sshdr;
>> +       struct scsi_failure failures[] = {
>> +               /* Fail immediately for Medium Not Present */
>> +               {
>> +                       .sense = UNIT_ATTENTION,
>> +                       .asc = 0x3A,
> Shouldn't you set .ascq = SCMD_FAILURE_ASCQ_ANY here, and below as
> well?

You're right. Will fix all those cases.
Martin Wilck Sept. 15, 2023, 9:23 p.m. UTC | #3
On Fri, 2023-09-15 at 15:58 -0500, Mike Christie wrote:
> On 9/15/23 3:46 PM, Martin Wilck wrote:
> > >  sd_spinup_disk(struct scsi_disk *sdkp)
> > >  {
> > > -       unsigned char cmd[10];
> > > +       static const u8 cmd[10] = { TEST_UNIT_READY };
> > >         unsigned long spintime_expire = 0;
> > > -       int retries, spintime;
> > > +       int spintime, sense_valid = 0;
> > >         unsigned int the_result;
> > >         struct scsi_sense_hdr sshdr;
> > > +       struct scsi_failure failures[] = {
> > > +               /* Fail immediately for Medium Not Present */
> > > +               {
> > > +                       .sense = UNIT_ATTENTION,
> > > +                       .asc = 0x3A,
> > Shouldn't you set .ascq = SCMD_FAILURE_ASCQ_ANY here, and below as
> > well?
> 
> You're right. Will fix all those cases.

I also noted that you don't treat .ascq = 0 consistently, e.g. in
07/34, where you set it for the NOT_READY case but not for others. It's
not wrong to omit it, but for code clarity it might be good to set it
explicitly.

Thanks,
Martin
diff mbox series

Patch

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 74967e1b19da..f5e6b5cc762f 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2151,55 +2151,64 @@  static int sd_done(struct scsi_cmnd *SCpnt)
 static void
 sd_spinup_disk(struct scsi_disk *sdkp)
 {
-	unsigned char cmd[10];
+	static const u8 cmd[10] = { TEST_UNIT_READY };
 	unsigned long spintime_expire = 0;
-	int retries, spintime;
+	int spintime, sense_valid = 0;
 	unsigned int the_result;
 	struct scsi_sense_hdr sshdr;
+	struct scsi_failure failures[] = {
+		/* Fail immediately for Medium Not Present */
+		{
+			.sense = UNIT_ATTENTION,
+			.asc = 0x3A,
+			.allowed = 0,
+			.result = SAM_STAT_CHECK_CONDITION,
+		},
+		{
+			.sense = NOT_READY,
+			.asc = 0x3A,
+			.allowed = 0,
+			.result = SAM_STAT_CHECK_CONDITION,
+		},
+		{
+			.result = SCMD_FAILURE_STAT_ANY,
+			.allowed = 3,
+		},
+		{}
+	};
 	const struct scsi_exec_args exec_args = {
 		.sshdr = &sshdr,
+		.failures = failures,
 	};
-	int sense_valid = 0;
 
 	spintime = 0;
 
 	/* Spin up drives, as required.  Only do this at boot time */
 	/* Spinup needs to be done for module loads too. */
 	do {
-		retries = 0;
-
-		do {
-			bool media_was_present = sdkp->media_present;
+		bool media_was_present = sdkp->media_present;
 
-			cmd[0] = TEST_UNIT_READY;
-			memset((void *) &cmd[1], 0, 9);
+		scsi_reset_failures(failures);
 
-			the_result = scsi_execute_cmd(sdkp->device, cmd,
-						      REQ_OP_DRV_IN, NULL, 0,
-						      SD_TIMEOUT,
-						      sdkp->max_retries,
-						      &exec_args);
+		the_result = scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN,
+					      NULL, 0, SD_TIMEOUT,
+					      sdkp->max_retries, &exec_args);
 
-			if (the_result > 0) {
-				/*
-				 * If the drive has indicated to us that it
-				 * doesn't have any media in it, don't bother
-				 * with any more polling.
-				 */
-				if (media_not_present(sdkp, &sshdr)) {
-					if (media_was_present)
-						sd_printk(KERN_NOTICE, sdkp,
-							  "Media removed, stopped polling\n");
-					return;
-				}
 
-				sense_valid = scsi_sense_valid(&sshdr);
+		if (the_result > 0) {
+			/*
+			 * If the drive has indicated to us that it doesn't
+			 * have any media in it, don't bother with any more
+			 * polling.
+			 */
+			if (media_not_present(sdkp, &sshdr)) {
+				if (media_was_present)
+					sd_printk(KERN_NOTICE, sdkp,
+						  "Media removed, stopped polling\n");
+				return;
 			}
-			retries++;
-		} while (retries < 3 &&
-			 (!scsi_status_is_good(the_result) ||
-			  (scsi_status_is_check_condition(the_result) &&
-			  sense_valid && sshdr.sense_key == UNIT_ATTENTION)));
+			sense_valid = scsi_sense_valid(&sshdr);
+		}
 
 		if (!scsi_status_is_check_condition(the_result)) {
 			/* no sense, TUR either succeeded or failed