Message ID | 20180806045115.7725-5-dgilbert@interlog.com (mailing list archive) |
---|---|
State | Deferred |
Headers | show |
Series | sd: init_command() and sd_done() speed-ups | expand |
On 2018/08/06 13:51, Douglas Gilbert wrote: > Re-arrange some logic to lessen the number of checks. With logical > ANDs put the least likely first, with logical ORs put the most > likely first. Also add conditional hints on the assumed fastpath. > > Signed-off-by: Douglas Gilbert <dgilbert@interlog.com> > --- > drivers/scsi/sd.c | 43 ++++++++++++++++++++++++------------------- > 1 file changed, 24 insertions(+), 19 deletions(-) > > diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c > index 9f047fd3c92d..05014054e357 100644 > --- a/drivers/scsi/sd.c > +++ b/drivers/scsi/sd.c > @@ -1171,9 +1171,9 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *cmd) > > fua = (rq->cmd_flags & REQ_FUA) ? 0x8 : 0; > dix = scsi_prot_sg_count(cmd); > - dif = scsi_host_dif_capable(cmd->device->host, sdkp->protection_type); > + dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type); > > - if (write && dix) > + if (dix && write) > sd_dif_prepare(cmd); > > if (dif || dix) > @@ -1181,19 +1181,27 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *cmd) > else > protect = 0; > > - if (protect && sdkp->protection_type == T10_PI_TYPE2_PROTECTION) { > + if (unlikely(protect && > + sdkp->protection_type == T10_PI_TYPE2_PROTECTION)) > ret = sd_setup_read_write_32_cmnd(cmd, write, lba, nr_blocks, > protect | fua); > - } else if (sdp->use_16_for_rw || (nr_blocks > 0xffff)) { > + else if (sdp->use_16_for_rw) > ret = sd_setup_read_write_16_cmnd(cmd, write, lba, nr_blocks, > protect | fua); So here, without use_16_for_rw being forced on (which is the case for most disks I think, except ZBC disks which mandate it) or most disks, all read/write to low LBAs will have to go through a longer chain of if/else if... Is this change really such a gain in average ? It looks like this will be a loss for the first small partition at the beginning of the disk. > - } else if ((nr_blocks > 0xff) || (lba > 0x1fffff) || sdp->use_10_for_rw > - || protect) { > - ret = sd_setup_read_write_10_cmnd(cmd, write, lba, nr_blocks, > - protect | fua); > - } else { > - ret = sd_setup_read_write_6_cmnd(cmd, write, lba, nr_blocks, > - protect | fua); > + else if (likely(nr_blocks < 0x100)) { > + if (sdp->use_10_for_rw || (lba > 0x1fffff) || protect) > + ret = sd_setup_read_write_10_cmnd(cmd, write, lba, > + nr_blocks, protect | fua); > + else > + ret = sd_setup_read_write_6_cmnd(cmd, write, lba, > + nr_blocks, protect | fua); > + } else { /* not already done and nr_blocks > 0xff */ > + if (unlikely(nr_blocks > 0xffff)) > + ret = sd_setup_read_write_16_cmnd(cmd, write, lba, > + nr_blocks, protect | fua); > + else > + ret = sd_setup_read_write_10_cmnd(cmd, write, lba, > + nr_blocks, protect | fua); > } > > if (ret != BLKPREP_OK) > @@ -1976,7 +1984,6 @@ static int sd_done(struct scsi_cmnd *SCpnt) > struct scsi_disk *sdkp = scsi_disk(SCpnt->request->rq_disk); > struct request *req = SCpnt->request; > int sense_valid = 0; > - int sense_deferred = 0; > > /* > * Assumption that REQ_OP_READ and REQ_OP_WRITE are 0 and 1 is > @@ -2030,16 +2037,14 @@ static int sd_done(struct scsi_cmnd *SCpnt) > } > } > > - if (result) { > + if (unlikely(result)) { > sense_valid = scsi_command_normalize_sense(SCpnt, &sshdr); > - if (sense_valid) > - sense_deferred = scsi_sense_is_deferred(&sshdr); > + if (driver_byte(result) == DRIVER_SENSE || > + (sense_valid && (!scsi_sense_is_deferred(&sshdr)))) > + good_bytes = sd_done_sense(SCpnt, good_bytes, &sshdr); > } > - sdkp->medium_access_timed_out = 0; > > - if (unlikely(driver_byte(result) == DRIVER_SENSE || > - (sense_valid && !sense_deferred))) > - good_bytes = sd_done_sense(SCpnt, good_bytes, &sshdr); > + sdkp->medium_access_timed_out = 0; > > if (sd_is_zoned(sdkp)) > sd_zbc_complete(SCpnt, good_bytes, &sshdr); >
On 2018-08-06 01:41 AM, Damien Le Moal wrote: > On 2018/08/06 13:51, Douglas Gilbert wrote: >> Re-arrange some logic to lessen the number of checks. With logical >> ANDs put the least likely first, with logical ORs put the most >> likely first. Also add conditional hints on the assumed fastpath. >> >> Signed-off-by: Douglas Gilbert <dgilbert@interlog.com> >> --- >> drivers/scsi/sd.c | 43 ++++++++++++++++++++++++------------------- >> 1 file changed, 24 insertions(+), 19 deletions(-) >> >> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c >> index 9f047fd3c92d..05014054e357 100644 >> --- a/drivers/scsi/sd.c >> +++ b/drivers/scsi/sd.c >> @@ -1171,9 +1171,9 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *cmd) >> >> fua = (rq->cmd_flags & REQ_FUA) ? 0x8 : 0; >> dix = scsi_prot_sg_count(cmd); >> - dif = scsi_host_dif_capable(cmd->device->host, sdkp->protection_type); >> + dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type); >> >> - if (write && dix) >> + if (dix && write) >> sd_dif_prepare(cmd); >> >> if (dif || dix) >> @@ -1181,19 +1181,27 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *cmd) >> else >> protect = 0; >> >> - if (protect && sdkp->protection_type == T10_PI_TYPE2_PROTECTION) { >> + if (unlikely(protect && >> + sdkp->protection_type == T10_PI_TYPE2_PROTECTION)) >> ret = sd_setup_read_write_32_cmnd(cmd, write, lba, nr_blocks, >> protect | fua); >> - } else if (sdp->use_16_for_rw || (nr_blocks > 0xffff)) { >> + else if (sdp->use_16_for_rw) >> ret = sd_setup_read_write_16_cmnd(cmd, write, lba, nr_blocks, >> protect | fua); > > So here, without use_16_for_rw being forced on (which is the case for most disks > I think, except ZBC disks which mandate it) or most disks, all read/write to low > LBAs will have to go through a longer chain of if/else if... Is this change > really such a gain in average ? It looks like this will be a loss for the first > small partition at the beginning of the disk. For comparison I assume: protect = 0 (nr_blocks < 0x100) == 1 (use_16_for_rw || use_10_for_rw) == 1 So for the use_16_for_rw == 0 and lba <= 0x1fffff case I count: old: 6 proposed: 4 comparisons when use_10_for_rw == 1. The old figure could be reduced to 4 by re-ordering the comparisons on its use_10_for_rw "else if" line (i.e. place use_10_for_rw first rather than third). I'm not so concerned about the speed of READ(6) ... no SSD should be using that deprecated command. That said, I think the proposed code is slightly faster because the old code checks that both nr_blocks > 0xffff and nr_blocks > 0xff fail. The proposed code only checks the latter en route to the READ(6). Also I think the proposed small decision tree is slightly easier to follow (and hence maintain) than the existing "flat" if/else chain which is deceptively complex. So more a code clean-up than a speed-up :-) Thanks for the feedback. Doug Gilbert BTW the major code speed improvement is not in this patchset but an earlier patch to convert the opened coded shifts to put_unaligned_be*() calls, IMO. >> - } else if ((nr_blocks > 0xff) || (lba > 0x1fffff) || sdp->use_10_for_rw >> - || protect) { >> - ret = sd_setup_read_write_10_cmnd(cmd, write, lba, nr_blocks, >> - protect | fua); >> - } else { >> - ret = sd_setup_read_write_6_cmnd(cmd, write, lba, nr_blocks, >> - protect | fua); >> + else if (likely(nr_blocks < 0x100)) { >> + if (sdp->use_10_for_rw || (lba > 0x1fffff) || protect) >> + ret = sd_setup_read_write_10_cmnd(cmd, write, lba, >> + nr_blocks, protect | fua); >> + else >> + ret = sd_setup_read_write_6_cmnd(cmd, write, lba, >> + nr_blocks, protect | fua); >> + } else { /* not already done and nr_blocks > 0xff */ >> + if (unlikely(nr_blocks > 0xffff)) >> + ret = sd_setup_read_write_16_cmnd(cmd, write, lba, >> + nr_blocks, protect | fua); >> + else >> + ret = sd_setup_read_write_10_cmnd(cmd, write, lba, >> + nr_blocks, protect | fua); >> }
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 9f047fd3c92d..05014054e357 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1171,9 +1171,9 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *cmd) fua = (rq->cmd_flags & REQ_FUA) ? 0x8 : 0; dix = scsi_prot_sg_count(cmd); - dif = scsi_host_dif_capable(cmd->device->host, sdkp->protection_type); + dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type); - if (write && dix) + if (dix && write) sd_dif_prepare(cmd); if (dif || dix) @@ -1181,19 +1181,27 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *cmd) else protect = 0; - if (protect && sdkp->protection_type == T10_PI_TYPE2_PROTECTION) { + if (unlikely(protect && + sdkp->protection_type == T10_PI_TYPE2_PROTECTION)) ret = sd_setup_read_write_32_cmnd(cmd, write, lba, nr_blocks, protect | fua); - } else if (sdp->use_16_for_rw || (nr_blocks > 0xffff)) { + else if (sdp->use_16_for_rw) ret = sd_setup_read_write_16_cmnd(cmd, write, lba, nr_blocks, protect | fua); - } else if ((nr_blocks > 0xff) || (lba > 0x1fffff) || sdp->use_10_for_rw - || protect) { - ret = sd_setup_read_write_10_cmnd(cmd, write, lba, nr_blocks, - protect | fua); - } else { - ret = sd_setup_read_write_6_cmnd(cmd, write, lba, nr_blocks, - protect | fua); + else if (likely(nr_blocks < 0x100)) { + if (sdp->use_10_for_rw || (lba > 0x1fffff) || protect) + ret = sd_setup_read_write_10_cmnd(cmd, write, lba, + nr_blocks, protect | fua); + else + ret = sd_setup_read_write_6_cmnd(cmd, write, lba, + nr_blocks, protect | fua); + } else { /* not already done and nr_blocks > 0xff */ + if (unlikely(nr_blocks > 0xffff)) + ret = sd_setup_read_write_16_cmnd(cmd, write, lba, + nr_blocks, protect | fua); + else + ret = sd_setup_read_write_10_cmnd(cmd, write, lba, + nr_blocks, protect | fua); } if (ret != BLKPREP_OK) @@ -1976,7 +1984,6 @@ static int sd_done(struct scsi_cmnd *SCpnt) struct scsi_disk *sdkp = scsi_disk(SCpnt->request->rq_disk); struct request *req = SCpnt->request; int sense_valid = 0; - int sense_deferred = 0; /* * Assumption that REQ_OP_READ and REQ_OP_WRITE are 0 and 1 is @@ -2030,16 +2037,14 @@ static int sd_done(struct scsi_cmnd *SCpnt) } } - if (result) { + if (unlikely(result)) { sense_valid = scsi_command_normalize_sense(SCpnt, &sshdr); - if (sense_valid) - sense_deferred = scsi_sense_is_deferred(&sshdr); + if (driver_byte(result) == DRIVER_SENSE || + (sense_valid && (!scsi_sense_is_deferred(&sshdr)))) + good_bytes = sd_done_sense(SCpnt, good_bytes, &sshdr); } - sdkp->medium_access_timed_out = 0; - if (unlikely(driver_byte(result) == DRIVER_SENSE || - (sense_valid && !sense_deferred))) - good_bytes = sd_done_sense(SCpnt, good_bytes, &sshdr); + sdkp->medium_access_timed_out = 0; if (sd_is_zoned(sdkp)) sd_zbc_complete(SCpnt, good_bytes, &sshdr);
Re-arrange some logic to lessen the number of checks. With logical ANDs put the least likely first, with logical ORs put the most likely first. Also add conditional hints on the assumed fastpath. Signed-off-by: Douglas Gilbert <dgilbert@interlog.com> --- drivers/scsi/sd.c | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-)