Message ID | 20230714213419.95492-8-michael.christie@oracle.com (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
Series | scsi: Allow scsi_execute users to control retries | expand |
On 14/07/2023 22:33, Mike Christie wrote: > This has read_capacity_16 have scsi-ml retry errors instead of driving > them itself. > > There are 2 behavior changes with this patch: > 1. There is one behavior change where we no longer retry when > scsi_execute_cmd returns < 0, but we should be ok. We don't need to retry > for failures like the queue being removed, and for the case where there > are no tags/reqs since the block layer waits/retries for us. For possible > memory allocation failures from blk_rq_map_kern we use GFP_NOIO, so > retrying will probably not help. > 2. For the specific UAs we checked for and retried, we would get > READ_CAPACITY_RETRIES_ON_RESET retries plus whatever retries were left > from the loop's retries. Each UA now gets READ_CAPACITY_RETRIES_ON_RESET > reties, and the other errors (not including medium not present) get up > to 3 retries. > > Signed-off-by: Mike Christie <michael.christie@oracle.com> > Reviewed-by: Christoph Hellwig <hch@lst.de> Regardless of a couple of nitpicks, below: Reviewed-by: John Garry <john.g.garry@oracle.com> > --- > drivers/scsi/sd.c | 106 ++++++++++++++++++++++++++++++---------------- > 1 file changed, 69 insertions(+), 37 deletions(-) > > diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c > index 755b09beff2a..245419fe9358 100644 > --- a/drivers/scsi/sd.c > +++ b/drivers/scsi/sd.c > @@ -2402,55 +2402,87 @@ static void read_capacity_error(struct scsi_disk *sdkp, struct scsi_device *sdp, > static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, > unsigned char *buffer) > { > - unsigned char cmd[16]; > + static const u8 cmd[16] = { SERVICE_ACTION_IN_16, SAI_READ_CAPACITY_16, > + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, RC16_LEN }; In terms of coding style, could you follow previous style, like: static const u8 cmd[16] = { [0] = SERVICE_ACTION_IN_16, [1] = SAI_READ_CAPACITY_16, [13] = RC16_LEN, }; Seems safe to me (to ensure we fill in the correct array element). > struct scsi_sense_hdr sshdr; > - const struct scsi_exec_args exec_args = { > - .sshdr = &sshdr, > - }; > int sense_valid = 0; > int the_result; > - int retries = 3, reset_retries = READ_CAPACITY_RETRIES_ON_RESET; > unsigned int alignment; > unsigned long long lba; > unsigned sector_size; > + struct scsi_failure failures[] = { > + /* > + * Fail immediately for Invalid Command Operation Code or > + * Invalid Field in CDB. > + */ > + { > + .sense = ILLEGAL_REQUEST, > + .asc = 0x20, > + .allowed = 0, > + .result = SAM_STAT_CHECK_CONDITION, > + }, > + { > + .sense = ILLEGAL_REQUEST, > + .asc = 0x24, > + .allowed = 0, > + .result = SAM_STAT_CHECK_CONDITION, > + }, > + /* Fail immediately for Medium Not Present */ > + { > + .sense = UNIT_ATTENTION, > + .asc = 0x3A, > + .allowed = 0, > + .result = SAM_STAT_CHECK_CONDITION, > + }, > + { > + .sense = NOT_READY, > + .asc = 0x3A, > + .ascq = 0x0, > + .allowed = 0, > + .result = SAM_STAT_CHECK_CONDITION, > + }, > + { > + .sense = UNIT_ATTENTION, > + .asc = 0x29, > + /* Device reset might occur several times */ > + .allowed = READ_CAPACITY_RETRIES_ON_RESET, > + .result = SAM_STAT_CHECK_CONDITION, > + }, > + /* Any other error not listed above retry */ > + { > + .result = SCMD_FAILURE_RESULT_ANY, > + .allowed = 3, > + }, > + {} > + }; > + const struct scsi_exec_args exec_args = { > + .sshdr = &sshdr, > + .failures = failures, > + }; > > if (sdp->no_read_capacity_16) > return -EINVAL; > > - do { > - memset(cmd, 0, 16); > - cmd[0] = SERVICE_ACTION_IN_16; > - cmd[1] = SAI_READ_CAPACITY_16; > - cmd[13] = RC16_LEN; > - memset(buffer, 0, RC16_LEN); > - > - the_result = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, > - buffer, RC16_LEN, SD_TIMEOUT, > - sdkp->max_retries, &exec_args); > - if (the_result > 0) { > - if (media_not_present(sdkp, &sshdr)) > - return -ENODEV; > + memset(buffer, 0, RC16_LEN); > > - sense_valid = scsi_sense_valid(&sshdr); > - if (sense_valid && > - sshdr.sense_key == ILLEGAL_REQUEST && > - (sshdr.asc == 0x20 || sshdr.asc == 0x24) && > - sshdr.ascq == 0x00) > - /* Invalid Command Operation Code or > - * Invalid Field in CDB, just retry > - * silently with RC10 */ > - return -EINVAL; > - if (sense_valid && > - sshdr.sense_key == UNIT_ATTENTION && > - sshdr.asc == 0x29 && sshdr.ascq == 0x00) > - /* Device reset might occur several times, > - * give it one more chance */ > - if (--reset_retries > 0) > - continue; > - } > - retries--; > + the_result = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, buffer, > + RC16_LEN, SD_TIMEOUT, sdkp->max_retries, > + &exec_args); > > - } while (the_result && retries); > + if (the_result > 0) { > + if (media_not_present(sdkp, &sshdr)) > + return -ENODEV; > + > + sense_valid = scsi_sense_valid(&sshdr); > + if (sense_valid && sshdr.sense_key == ILLEGAL_REQUEST && > + (sshdr.asc == 0x20 || sshdr.asc == 0x24) && > + sshdr.ascq == 0x00) > + /* > + * Invalid Command Operation Code or Invalid Field in > + * CDB, just retry silently with RC10 > + */ > + return -EINVAL; nit: personally I would use {} here, like below, but that's your choice. (sshdr.asc == 0x20 || sshdr.asc == 0x24) && sshdr.ascq == 0x00) { /* * Invalid Command Operation Code or Invalid Field in * CDB, just retry silently with RC10 */ return -EINVAL; } > + } > > if (the_result) { > sd_print_result(sdkp, "Read Capacity(16) failed", the_result);
On 7/17/23 10:43 AM, John Garry wrote: >> >> - unsigned char cmd[16]; >> + static const u8 cmd[16] = { SERVICE_ACTION_IN_16, SAI_READ_CAPACITY_16, >> + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, RC16_LEN }; > > In terms of coding style, could you follow previous style, like: > static const u8 cmd[16] = { > [0] = SERVICE_ACTION_IN_16, > [1] = SAI_READ_CAPACITY_16, > [13] = RC16_LEN, > }; > > Seems safe to me (to ensure we fill in the correct array element). > That's nice. Will fix throughout the patchset. >> + >> + sense_valid = scsi_sense_valid(&sshdr); >> + if (sense_valid && sshdr.sense_key == ILLEGAL_REQUEST && >> + (sshdr.asc == 0x20 || sshdr.asc == 0x24) && >> + sshdr.ascq == 0x00) >> + /* >> + * Invalid Command Operation Code or Invalid Field in >> + * CDB, just retry silently with RC10 >> + */ >> + return -EINVAL; > > > nit: personally I would use {} here, like below, but that's your choice. > Yeah, I like the {} when there's comments. I've got the not needed comment in other patches so I just kept the previous style.
On 7/14/23 14:33, Mike Christie wrote: > This has read_capacity_16 have scsi-ml retry errors instead of driving > them itself. Assuming John's comments will be addressed: Reviewed-by: Bart Van Assche <bvanassche@acm.org>
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 755b09beff2a..245419fe9358 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2402,55 +2402,87 @@ static void read_capacity_error(struct scsi_disk *sdkp, struct scsi_device *sdp, static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, unsigned char *buffer) { - unsigned char cmd[16]; + static const u8 cmd[16] = { SERVICE_ACTION_IN_16, SAI_READ_CAPACITY_16, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, RC16_LEN }; struct scsi_sense_hdr sshdr; - const struct scsi_exec_args exec_args = { - .sshdr = &sshdr, - }; int sense_valid = 0; int the_result; - int retries = 3, reset_retries = READ_CAPACITY_RETRIES_ON_RESET; unsigned int alignment; unsigned long long lba; unsigned sector_size; + struct scsi_failure failures[] = { + /* + * Fail immediately for Invalid Command Operation Code or + * Invalid Field in CDB. + */ + { + .sense = ILLEGAL_REQUEST, + .asc = 0x20, + .allowed = 0, + .result = SAM_STAT_CHECK_CONDITION, + }, + { + .sense = ILLEGAL_REQUEST, + .asc = 0x24, + .allowed = 0, + .result = SAM_STAT_CHECK_CONDITION, + }, + /* Fail immediately for Medium Not Present */ + { + .sense = UNIT_ATTENTION, + .asc = 0x3A, + .allowed = 0, + .result = SAM_STAT_CHECK_CONDITION, + }, + { + .sense = NOT_READY, + .asc = 0x3A, + .ascq = 0x0, + .allowed = 0, + .result = SAM_STAT_CHECK_CONDITION, + }, + { + .sense = UNIT_ATTENTION, + .asc = 0x29, + /* Device reset might occur several times */ + .allowed = READ_CAPACITY_RETRIES_ON_RESET, + .result = SAM_STAT_CHECK_CONDITION, + }, + /* Any other error not listed above retry */ + { + .result = SCMD_FAILURE_RESULT_ANY, + .allowed = 3, + }, + {} + }; + const struct scsi_exec_args exec_args = { + .sshdr = &sshdr, + .failures = failures, + }; if (sdp->no_read_capacity_16) return -EINVAL; - do { - memset(cmd, 0, 16); - cmd[0] = SERVICE_ACTION_IN_16; - cmd[1] = SAI_READ_CAPACITY_16; - cmd[13] = RC16_LEN; - memset(buffer, 0, RC16_LEN); - - the_result = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, - buffer, RC16_LEN, SD_TIMEOUT, - sdkp->max_retries, &exec_args); - if (the_result > 0) { - if (media_not_present(sdkp, &sshdr)) - return -ENODEV; + memset(buffer, 0, RC16_LEN); - sense_valid = scsi_sense_valid(&sshdr); - if (sense_valid && - sshdr.sense_key == ILLEGAL_REQUEST && - (sshdr.asc == 0x20 || sshdr.asc == 0x24) && - sshdr.ascq == 0x00) - /* Invalid Command Operation Code or - * Invalid Field in CDB, just retry - * silently with RC10 */ - return -EINVAL; - if (sense_valid && - sshdr.sense_key == UNIT_ATTENTION && - sshdr.asc == 0x29 && sshdr.ascq == 0x00) - /* Device reset might occur several times, - * give it one more chance */ - if (--reset_retries > 0) - continue; - } - retries--; + the_result = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, buffer, + RC16_LEN, SD_TIMEOUT, sdkp->max_retries, + &exec_args); - } while (the_result && retries); + if (the_result > 0) { + if (media_not_present(sdkp, &sshdr)) + return -ENODEV; + + sense_valid = scsi_sense_valid(&sshdr); + if (sense_valid && sshdr.sense_key == ILLEGAL_REQUEST && + (sshdr.asc == 0x20 || sshdr.asc == 0x24) && + sshdr.ascq == 0x00) + /* + * Invalid Command Operation Code or Invalid Field in + * CDB, just retry silently with RC10 + */ + return -EINVAL; + } if (the_result) { sd_print_result(sdkp, "Read Capacity(16) failed", the_result);