diff mbox series

[v10,07/33] scsi: sd: Have scsi-ml retry read_capacity_16 errors

Message ID 20230714213419.95492-8-michael.christie@oracle.com (mailing list archive)
State Changes Requested
Headers show
Series scsi: Allow scsi_execute users to control retries | expand

Commit Message

Mike Christie July 14, 2023, 9:33 p.m. UTC
This has read_capacity_16 have scsi-ml retry errors instead of driving
them itself.

There are 2 behavior changes with this patch:
1. There is one behavior change where we no longer retry when
scsi_execute_cmd returns < 0, but we should be ok. We don't need to retry
for failures like the queue being removed, and for the case where there
are no tags/reqs since the block layer waits/retries for us. For possible
memory allocation failures from blk_rq_map_kern we use GFP_NOIO, so
retrying will probably not help.
2. For the specific UAs we checked for and retried, we would get
READ_CAPACITY_RETRIES_ON_RESET retries plus whatever retries were left
from the loop's retries. Each UA now gets READ_CAPACITY_RETRIES_ON_RESET
reties, and the other errors (not including medium not present) get up
to 3 retries.

Signed-off-by: Mike Christie <michael.christie@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 drivers/scsi/sd.c | 106 ++++++++++++++++++++++++++++++----------------
 1 file changed, 69 insertions(+), 37 deletions(-)

Comments

John Garry July 17, 2023, 3:43 p.m. UTC | #1
On 14/07/2023 22:33, Mike Christie wrote:
> This has read_capacity_16 have scsi-ml retry errors instead of driving
> them itself.
> 
> There are 2 behavior changes with this patch:
> 1. There is one behavior change where we no longer retry when
> scsi_execute_cmd returns < 0, but we should be ok. We don't need to retry
> for failures like the queue being removed, and for the case where there
> are no tags/reqs since the block layer waits/retries for us. For possible
> memory allocation failures from blk_rq_map_kern we use GFP_NOIO, so
> retrying will probably not help.
> 2. For the specific UAs we checked for and retried, we would get
> READ_CAPACITY_RETRIES_ON_RESET retries plus whatever retries were left
> from the loop's retries. Each UA now gets READ_CAPACITY_RETRIES_ON_RESET
> reties, and the other errors (not including medium not present) get up
> to 3 retries.
> 
> Signed-off-by: Mike Christie <michael.christie@oracle.com>
> Reviewed-by: Christoph Hellwig <hch@lst.de>

Regardless of a couple of nitpicks, below:

Reviewed-by: John Garry <john.g.garry@oracle.com>

> ---
>   drivers/scsi/sd.c | 106 ++++++++++++++++++++++++++++++----------------
>   1 file changed, 69 insertions(+), 37 deletions(-)
> 
> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> index 755b09beff2a..245419fe9358 100644
> --- a/drivers/scsi/sd.c
> +++ b/drivers/scsi/sd.c
> @@ -2402,55 +2402,87 @@ static void read_capacity_error(struct scsi_disk *sdkp, struct scsi_device *sdp,
>   static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp,
>   						unsigned char *buffer)
>   {
> -	unsigned char cmd[16];
> +	static const u8 cmd[16] = { SERVICE_ACTION_IN_16, SAI_READ_CAPACITY_16,
> +				    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, RC16_LEN };

In terms of coding style, could you follow previous style, like:
	static const u8 cmd[16] = {
		[0] = SERVICE_ACTION_IN_16,
		[1] = SAI_READ_CAPACITY_16,
		[13] = RC16_LEN,
	};
	
Seems safe to me (to ensure we fill in the correct array element).

>   	struct scsi_sense_hdr sshdr;
> -	const struct scsi_exec_args exec_args = {
> -		.sshdr = &sshdr,
> -	};
>   	int sense_valid = 0;
>   	int the_result;
> -	int retries = 3, reset_retries = READ_CAPACITY_RETRIES_ON_RESET;
>   	unsigned int alignment;
>   	unsigned long long lba;
>   	unsigned sector_size;
> +	struct scsi_failure failures[] = {
> +		/*
> +		 * Fail immediately for Invalid Command Operation Code or
> +		 * Invalid Field in CDB.
> +		 */
> +		{
> +			.sense = ILLEGAL_REQUEST,
> +			.asc = 0x20,
> +			.allowed = 0,
> +			.result = SAM_STAT_CHECK_CONDITION,
> +		},
> +		{
> +			.sense = ILLEGAL_REQUEST,
> +			.asc = 0x24,
> +			.allowed = 0,
> +			.result = SAM_STAT_CHECK_CONDITION,
> +		},
> +		/* Fail immediately for Medium Not Present */
> +		{
> +			.sense = UNIT_ATTENTION,
> +			.asc = 0x3A,
> +			.allowed = 0,
> +			.result = SAM_STAT_CHECK_CONDITION,
> +		},
> +		{
> +			.sense = NOT_READY,
> +			.asc = 0x3A,
> +			.ascq = 0x0,
> +			.allowed = 0,
> +			.result = SAM_STAT_CHECK_CONDITION,
> +		},
> +		{
> +			.sense = UNIT_ATTENTION,
> +			.asc = 0x29,
> +			/* Device reset might occur several times */
> +			.allowed = READ_CAPACITY_RETRIES_ON_RESET,
> +			.result = SAM_STAT_CHECK_CONDITION,
> +		},
> +		/* Any other error not listed above retry */
> +		{
> +			.result = SCMD_FAILURE_RESULT_ANY,
> +			.allowed = 3,
> +		},
> +		{}
> +	};
> +	const struct scsi_exec_args exec_args = {
> +		.sshdr = &sshdr,
> +		.failures = failures,
> +	};
>   
>   	if (sdp->no_read_capacity_16)
>   		return -EINVAL;
>   
> -	do {
> -		memset(cmd, 0, 16);
> -		cmd[0] = SERVICE_ACTION_IN_16;
> -		cmd[1] = SAI_READ_CAPACITY_16;
> -		cmd[13] = RC16_LEN;
> -		memset(buffer, 0, RC16_LEN);
> -
> -		the_result = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN,
> -					      buffer, RC16_LEN, SD_TIMEOUT,
> -					      sdkp->max_retries, &exec_args);
> -		if (the_result > 0) {
> -			if (media_not_present(sdkp, &sshdr))
> -				return -ENODEV;
> +	memset(buffer, 0, RC16_LEN);
>   
> -			sense_valid = scsi_sense_valid(&sshdr);
> -			if (sense_valid &&
> -			    sshdr.sense_key == ILLEGAL_REQUEST &&
> -			    (sshdr.asc == 0x20 || sshdr.asc == 0x24) &&
> -			    sshdr.ascq == 0x00)
> -				/* Invalid Command Operation Code or
> -				 * Invalid Field in CDB, just retry
> -				 * silently with RC10 */
> -				return -EINVAL;
> -			if (sense_valid &&
> -			    sshdr.sense_key == UNIT_ATTENTION &&
> -			    sshdr.asc == 0x29 && sshdr.ascq == 0x00)
> -				/* Device reset might occur several times,
> -				 * give it one more chance */
> -				if (--reset_retries > 0)
> -					continue;
> -		}
> -		retries--;
> +	the_result = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, buffer,
> +				      RC16_LEN, SD_TIMEOUT, sdkp->max_retries,
> +				      &exec_args);
>   
> -	} while (the_result && retries);
> +	if (the_result > 0) {
> +		if (media_not_present(sdkp, &sshdr))
> +			return -ENODEV;
> +
> +		sense_valid = scsi_sense_valid(&sshdr);
> +		if (sense_valid && sshdr.sense_key == ILLEGAL_REQUEST &&
> +		    (sshdr.asc == 0x20 || sshdr.asc == 0x24) &&
> +		     sshdr.ascq == 0x00)
> +			/*
> +			 * Invalid Command Operation Code or Invalid Field in
> +			 * CDB, just retry silently with RC10
> +			 */
> +			return -EINVAL;


nit: personally I would use {} here, like below, but that's your choice.


  (sshdr.asc == 0x20 || sshdr.asc == 0x24) &&
		     sshdr.ascq == 0x00) {
			/*
			 * Invalid Command Operation Code or Invalid Field in
			 * CDB, just retry silently with RC10
			 */
			return -EINVAL;
}

> +	}
>   
>   	if (the_result) {
>   		sd_print_result(sdkp, "Read Capacity(16) failed", the_result);
Mike Christie July 17, 2023, 6:37 p.m. UTC | #2
On 7/17/23 10:43 AM, John Garry wrote:
>>
>> -    unsigned char cmd[16];
>> +    static const u8 cmd[16] = { SERVICE_ACTION_IN_16, SAI_READ_CAPACITY_16,
>> +                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, RC16_LEN };
> 
> In terms of coding style, could you follow previous style, like:
>     static const u8 cmd[16] = {
>         [0] = SERVICE_ACTION_IN_16,
>         [1] = SAI_READ_CAPACITY_16,
>         [13] = RC16_LEN,
>     };
>     
> Seems safe to me (to ensure we fill in the correct array element).
> 

That's nice. Will fix throughout the patchset.

>> +
>> +        sense_valid = scsi_sense_valid(&sshdr);
>> +        if (sense_valid && sshdr.sense_key == ILLEGAL_REQUEST &&
>> +            (sshdr.asc == 0x20 || sshdr.asc == 0x24) &&
>> +             sshdr.ascq == 0x00)
>> +            /*
>> +             * Invalid Command Operation Code or Invalid Field in
>> +             * CDB, just retry silently with RC10
>> +             */
>> +            return -EINVAL;
> 
> 
> nit: personally I would use {} here, like below, but that's your choice.
> 

Yeah, I like the {} when there's comments. I've got the not needed comment
in other patches so I just kept the previous style.
Bart Van Assche July 19, 2023, 6:38 p.m. UTC | #3
On 7/14/23 14:33, Mike Christie wrote:
> This has read_capacity_16 have scsi-ml retry errors instead of driving
> them itself.

Assuming John's comments will be addressed:

Reviewed-by: Bart Van Assche <bvanassche@acm.org>
diff mbox series

Patch

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 755b09beff2a..245419fe9358 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2402,55 +2402,87 @@  static void read_capacity_error(struct scsi_disk *sdkp, struct scsi_device *sdp,
 static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp,
 						unsigned char *buffer)
 {
-	unsigned char cmd[16];
+	static const u8 cmd[16] = { SERVICE_ACTION_IN_16, SAI_READ_CAPACITY_16,
+				    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, RC16_LEN };
 	struct scsi_sense_hdr sshdr;
-	const struct scsi_exec_args exec_args = {
-		.sshdr = &sshdr,
-	};
 	int sense_valid = 0;
 	int the_result;
-	int retries = 3, reset_retries = READ_CAPACITY_RETRIES_ON_RESET;
 	unsigned int alignment;
 	unsigned long long lba;
 	unsigned sector_size;
+	struct scsi_failure failures[] = {
+		/*
+		 * Fail immediately for Invalid Command Operation Code or
+		 * Invalid Field in CDB.
+		 */
+		{
+			.sense = ILLEGAL_REQUEST,
+			.asc = 0x20,
+			.allowed = 0,
+			.result = SAM_STAT_CHECK_CONDITION,
+		},
+		{
+			.sense = ILLEGAL_REQUEST,
+			.asc = 0x24,
+			.allowed = 0,
+			.result = SAM_STAT_CHECK_CONDITION,
+		},
+		/* Fail immediately for Medium Not Present */
+		{
+			.sense = UNIT_ATTENTION,
+			.asc = 0x3A,
+			.allowed = 0,
+			.result = SAM_STAT_CHECK_CONDITION,
+		},
+		{
+			.sense = NOT_READY,
+			.asc = 0x3A,
+			.ascq = 0x0,
+			.allowed = 0,
+			.result = SAM_STAT_CHECK_CONDITION,
+		},
+		{
+			.sense = UNIT_ATTENTION,
+			.asc = 0x29,
+			/* Device reset might occur several times */
+			.allowed = READ_CAPACITY_RETRIES_ON_RESET,
+			.result = SAM_STAT_CHECK_CONDITION,
+		},
+		/* Any other error not listed above retry */
+		{
+			.result = SCMD_FAILURE_RESULT_ANY,
+			.allowed = 3,
+		},
+		{}
+	};
+	const struct scsi_exec_args exec_args = {
+		.sshdr = &sshdr,
+		.failures = failures,
+	};
 
 	if (sdp->no_read_capacity_16)
 		return -EINVAL;
 
-	do {
-		memset(cmd, 0, 16);
-		cmd[0] = SERVICE_ACTION_IN_16;
-		cmd[1] = SAI_READ_CAPACITY_16;
-		cmd[13] = RC16_LEN;
-		memset(buffer, 0, RC16_LEN);
-
-		the_result = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN,
-					      buffer, RC16_LEN, SD_TIMEOUT,
-					      sdkp->max_retries, &exec_args);
-		if (the_result > 0) {
-			if (media_not_present(sdkp, &sshdr))
-				return -ENODEV;
+	memset(buffer, 0, RC16_LEN);
 
-			sense_valid = scsi_sense_valid(&sshdr);
-			if (sense_valid &&
-			    sshdr.sense_key == ILLEGAL_REQUEST &&
-			    (sshdr.asc == 0x20 || sshdr.asc == 0x24) &&
-			    sshdr.ascq == 0x00)
-				/* Invalid Command Operation Code or
-				 * Invalid Field in CDB, just retry
-				 * silently with RC10 */
-				return -EINVAL;
-			if (sense_valid &&
-			    sshdr.sense_key == UNIT_ATTENTION &&
-			    sshdr.asc == 0x29 && sshdr.ascq == 0x00)
-				/* Device reset might occur several times,
-				 * give it one more chance */
-				if (--reset_retries > 0)
-					continue;
-		}
-		retries--;
+	the_result = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, buffer,
+				      RC16_LEN, SD_TIMEOUT, sdkp->max_retries,
+				      &exec_args);
 
-	} while (the_result && retries);
+	if (the_result > 0) {
+		if (media_not_present(sdkp, &sshdr))
+			return -ENODEV;
+
+		sense_valid = scsi_sense_valid(&sshdr);
+		if (sense_valid && sshdr.sense_key == ILLEGAL_REQUEST &&
+		    (sshdr.asc == 0x20 || sshdr.asc == 0x24) &&
+		     sshdr.ascq == 0x00)
+			/*
+			 * Invalid Command Operation Code or Invalid Field in
+			 * CDB, just retry silently with RC10
+			 */
+			return -EINVAL;
+	}
 
 	if (the_result) {
 		sd_print_result(sdkp, "Read Capacity(16) failed", the_result);