diff mbox series

[RFC,4/5] streamline some logical operations

Message ID 20180806045115.7725-5-dgilbert@interlog.com (mailing list archive)
State Deferred
Headers show
Series sd: init_command() and sd_done() speed-ups | expand

Commit Message

Douglas Gilbert Aug. 6, 2018, 4:51 a.m. UTC
Re-arrange some logic to lessen the number of checks. With logical
ANDs put the least likely first, with logical ORs put the most
likely first. Also add conditional hints on the assumed fastpath.

Signed-off-by: Douglas Gilbert <dgilbert@interlog.com>
---
 drivers/scsi/sd.c | 43 ++++++++++++++++++++++++-------------------
 1 file changed, 24 insertions(+), 19 deletions(-)

Comments

Damien Le Moal Aug. 6, 2018, 5:41 a.m. UTC | #1
On 2018/08/06 13:51, Douglas Gilbert wrote:
> Re-arrange some logic to lessen the number of checks. With logical
> ANDs put the least likely first, with logical ORs put the most
> likely first. Also add conditional hints on the assumed fastpath.
> 
> Signed-off-by: Douglas Gilbert <dgilbert@interlog.com>
> ---
>  drivers/scsi/sd.c | 43 ++++++++++++++++++++++++-------------------
>  1 file changed, 24 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> index 9f047fd3c92d..05014054e357 100644
> --- a/drivers/scsi/sd.c
> +++ b/drivers/scsi/sd.c
> @@ -1171,9 +1171,9 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
>  
>  	fua = (rq->cmd_flags & REQ_FUA) ? 0x8 : 0;
>  	dix = scsi_prot_sg_count(cmd);
> -	dif = scsi_host_dif_capable(cmd->device->host, sdkp->protection_type);
> +	dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type);
>  
> -	if (write && dix)
> +	if (dix && write)
>  		sd_dif_prepare(cmd);
>  
>  	if (dif || dix)
> @@ -1181,19 +1181,27 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
>  	else
>  		protect = 0;
>  
> -	if (protect && sdkp->protection_type == T10_PI_TYPE2_PROTECTION) {
> +	if (unlikely(protect &&
> +		     sdkp->protection_type == T10_PI_TYPE2_PROTECTION))
>  		ret = sd_setup_read_write_32_cmnd(cmd, write, lba, nr_blocks,
>  						  protect | fua);
> -	} else if (sdp->use_16_for_rw || (nr_blocks > 0xffff)) {
> +	else if (sdp->use_16_for_rw)
>  		ret = sd_setup_read_write_16_cmnd(cmd, write, lba, nr_blocks,
>  						  protect | fua);

So here, without use_16_for_rw being forced on (which is the case for most disks
I think, except ZBC disks which mandate it) or most disks, all read/write to low
LBAs will have to go through a longer chain of if/else if... Is this change
really such a gain in average ? It looks like this will be a loss for the first
small partition at the beginning of the disk.

> -	} else if ((nr_blocks > 0xff) || (lba > 0x1fffff) || sdp->use_10_for_rw
> -		   || protect) {
> -		ret = sd_setup_read_write_10_cmnd(cmd, write, lba, nr_blocks,
> -						  protect | fua);
> -	} else {
> -		ret = sd_setup_read_write_6_cmnd(cmd, write, lba, nr_blocks,
> -						 protect | fua);
> +	else if (likely(nr_blocks < 0x100)) {
> +		if (sdp->use_10_for_rw || (lba > 0x1fffff) || protect)
> +			ret = sd_setup_read_write_10_cmnd(cmd, write, lba,
> +						 nr_blocks, protect | fua);
> +		else
> +			ret = sd_setup_read_write_6_cmnd(cmd, write, lba,
> +						 nr_blocks, protect | fua);
> +	} else {	/* not already done and nr_blocks > 0xff */
> +		if (unlikely(nr_blocks > 0xffff))
> +			ret = sd_setup_read_write_16_cmnd(cmd, write, lba,
> +						 nr_blocks, protect | fua);
> +		else
> +			ret = sd_setup_read_write_10_cmnd(cmd, write, lba,
> +						 nr_blocks, protect | fua);
>  	}
>  
>  	if (ret != BLKPREP_OK)
> @@ -1976,7 +1984,6 @@ static int sd_done(struct scsi_cmnd *SCpnt)
>  	struct scsi_disk *sdkp = scsi_disk(SCpnt->request->rq_disk);
>  	struct request *req = SCpnt->request;
>  	int sense_valid = 0;
> -	int sense_deferred = 0;
>  
>  	/*
>  	 * Assumption that REQ_OP_READ and REQ_OP_WRITE are 0 and 1 is
> @@ -2030,16 +2037,14 @@ static int sd_done(struct scsi_cmnd *SCpnt)
>  		}
>  	}
>  
> -	if (result) {
> +	if (unlikely(result)) {
>  		sense_valid = scsi_command_normalize_sense(SCpnt, &sshdr);
> -		if (sense_valid)
> -			sense_deferred = scsi_sense_is_deferred(&sshdr);
> +		if (driver_byte(result) == DRIVER_SENSE ||
> +		    (sense_valid && (!scsi_sense_is_deferred(&sshdr))))
> +			good_bytes = sd_done_sense(SCpnt, good_bytes, &sshdr);
>  	}
> -	sdkp->medium_access_timed_out = 0;
>  
> -	if (unlikely(driver_byte(result) == DRIVER_SENSE ||
> -		     (sense_valid && !sense_deferred)))
> -		good_bytes = sd_done_sense(SCpnt, good_bytes, &sshdr);
> +	sdkp->medium_access_timed_out = 0;
>  
>  	if (sd_is_zoned(sdkp))
>  		sd_zbc_complete(SCpnt, good_bytes, &sshdr);
>
Douglas Gilbert Aug. 6, 2018, 4:13 p.m. UTC | #2
On 2018-08-06 01:41 AM, Damien Le Moal wrote:
> On 2018/08/06 13:51, Douglas Gilbert wrote:
>> Re-arrange some logic to lessen the number of checks. With logical
>> ANDs put the least likely first, with logical ORs put the most
>> likely first. Also add conditional hints on the assumed fastpath.
>>
>> Signed-off-by: Douglas Gilbert <dgilbert@interlog.com>
>> ---
>>   drivers/scsi/sd.c | 43 ++++++++++++++++++++++++-------------------
>>   1 file changed, 24 insertions(+), 19 deletions(-)
>>
>> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
>> index 9f047fd3c92d..05014054e357 100644
>> --- a/drivers/scsi/sd.c
>> +++ b/drivers/scsi/sd.c
>> @@ -1171,9 +1171,9 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
>>   
>>   	fua = (rq->cmd_flags & REQ_FUA) ? 0x8 : 0;
>>   	dix = scsi_prot_sg_count(cmd);
>> -	dif = scsi_host_dif_capable(cmd->device->host, sdkp->protection_type);
>> +	dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type);
>>   
>> -	if (write && dix)
>> +	if (dix && write)
>>   		sd_dif_prepare(cmd);
>>   
>>   	if (dif || dix)
>> @@ -1181,19 +1181,27 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
>>   	else
>>   		protect = 0;
>>   
>> -	if (protect && sdkp->protection_type == T10_PI_TYPE2_PROTECTION) {
>> +	if (unlikely(protect &&
>> +		     sdkp->protection_type == T10_PI_TYPE2_PROTECTION))
>>   		ret = sd_setup_read_write_32_cmnd(cmd, write, lba, nr_blocks,
>>   						  protect | fua);
>> -	} else if (sdp->use_16_for_rw || (nr_blocks > 0xffff)) {
>> +	else if (sdp->use_16_for_rw)
>>   		ret = sd_setup_read_write_16_cmnd(cmd, write, lba, nr_blocks,
>>   						  protect | fua);
> 
> So here, without use_16_for_rw being forced on (which is the case for most disks
> I think, except ZBC disks which mandate it) or most disks, all read/write to low
> LBAs will have to go through a longer chain of if/else if... Is this change
> really such a gain in average ? It looks like this will be a loss for the first
> small partition at the beginning of the disk.

For comparison I assume:
   protect = 0
   (nr_blocks < 0x100) == 1
   (use_16_for_rw || use_10_for_rw) == 1

So for the use_16_for_rw == 0 and lba <= 0x1fffff case I count:
   old: 6    proposed: 4
comparisons when use_10_for_rw == 1. The old figure could be reduced to
4 by re-ordering the comparisons on its use_10_for_rw "else if" line
(i.e. place use_10_for_rw first rather than third).

I'm not so concerned about the speed of READ(6) ... no SSD should be
using that deprecated command. That said, I think the proposed code
is slightly faster because the old code checks that both
nr_blocks > 0xffff and nr_blocks > 0xff fail. The proposed code only
checks the latter en route to the READ(6).


Also I think the proposed small decision tree is slightly easier to
follow (and hence maintain) than the existing "flat" if/else chain
which is deceptively complex. So more a code clean-up than a
speed-up :-)

Thanks for the feedback.
Doug Gilbert


BTW the major code speed improvement is not in this patchset but an earlier
patch to convert the opened coded shifts to put_unaligned_be*() calls, IMO.

>> -	} else if ((nr_blocks > 0xff) || (lba > 0x1fffff) || sdp->use_10_for_rw
>> -		   || protect) {
>> -		ret = sd_setup_read_write_10_cmnd(cmd, write, lba, nr_blocks,
>> -						  protect | fua);
>> -	} else {
>> -		ret = sd_setup_read_write_6_cmnd(cmd, write, lba, nr_blocks,
>> -						 protect | fua);
>> +	else if (likely(nr_blocks < 0x100)) {
>> +		if (sdp->use_10_for_rw || (lba > 0x1fffff) || protect)
>> +			ret = sd_setup_read_write_10_cmnd(cmd, write, lba,
>> +						 nr_blocks, protect | fua);
>> +		else
>> +			ret = sd_setup_read_write_6_cmnd(cmd, write, lba,
>> +						 nr_blocks, protect | fua);
>> +	} else {	/* not already done and nr_blocks > 0xff */
>> +		if (unlikely(nr_blocks > 0xffff))
>> +			ret = sd_setup_read_write_16_cmnd(cmd, write, lba,
>> +						 nr_blocks, protect | fua);
>> +		else
>> +			ret = sd_setup_read_write_10_cmnd(cmd, write, lba,
>> +						 nr_blocks, protect | fua);
>>   	}
diff mbox series

Patch

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 9f047fd3c92d..05014054e357 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1171,9 +1171,9 @@  static int sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
 
 	fua = (rq->cmd_flags & REQ_FUA) ? 0x8 : 0;
 	dix = scsi_prot_sg_count(cmd);
-	dif = scsi_host_dif_capable(cmd->device->host, sdkp->protection_type);
+	dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type);
 
-	if (write && dix)
+	if (dix && write)
 		sd_dif_prepare(cmd);
 
 	if (dif || dix)
@@ -1181,19 +1181,27 @@  static int sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
 	else
 		protect = 0;
 
-	if (protect && sdkp->protection_type == T10_PI_TYPE2_PROTECTION) {
+	if (unlikely(protect &&
+		     sdkp->protection_type == T10_PI_TYPE2_PROTECTION))
 		ret = sd_setup_read_write_32_cmnd(cmd, write, lba, nr_blocks,
 						  protect | fua);
-	} else if (sdp->use_16_for_rw || (nr_blocks > 0xffff)) {
+	else if (sdp->use_16_for_rw)
 		ret = sd_setup_read_write_16_cmnd(cmd, write, lba, nr_blocks,
 						  protect | fua);
-	} else if ((nr_blocks > 0xff) || (lba > 0x1fffff) || sdp->use_10_for_rw
-		   || protect) {
-		ret = sd_setup_read_write_10_cmnd(cmd, write, lba, nr_blocks,
-						  protect | fua);
-	} else {
-		ret = sd_setup_read_write_6_cmnd(cmd, write, lba, nr_blocks,
-						 protect | fua);
+	else if (likely(nr_blocks < 0x100)) {
+		if (sdp->use_10_for_rw || (lba > 0x1fffff) || protect)
+			ret = sd_setup_read_write_10_cmnd(cmd, write, lba,
+						 nr_blocks, protect | fua);
+		else
+			ret = sd_setup_read_write_6_cmnd(cmd, write, lba,
+						 nr_blocks, protect | fua);
+	} else {	/* not already done and nr_blocks > 0xff */
+		if (unlikely(nr_blocks > 0xffff))
+			ret = sd_setup_read_write_16_cmnd(cmd, write, lba,
+						 nr_blocks, protect | fua);
+		else
+			ret = sd_setup_read_write_10_cmnd(cmd, write, lba,
+						 nr_blocks, protect | fua);
 	}
 
 	if (ret != BLKPREP_OK)
@@ -1976,7 +1984,6 @@  static int sd_done(struct scsi_cmnd *SCpnt)
 	struct scsi_disk *sdkp = scsi_disk(SCpnt->request->rq_disk);
 	struct request *req = SCpnt->request;
 	int sense_valid = 0;
-	int sense_deferred = 0;
 
 	/*
 	 * Assumption that REQ_OP_READ and REQ_OP_WRITE are 0 and 1 is
@@ -2030,16 +2037,14 @@  static int sd_done(struct scsi_cmnd *SCpnt)
 		}
 	}
 
-	if (result) {
+	if (unlikely(result)) {
 		sense_valid = scsi_command_normalize_sense(SCpnt, &sshdr);
-		if (sense_valid)
-			sense_deferred = scsi_sense_is_deferred(&sshdr);
+		if (driver_byte(result) == DRIVER_SENSE ||
+		    (sense_valid && (!scsi_sense_is_deferred(&sshdr))))
+			good_bytes = sd_done_sense(SCpnt, good_bytes, &sshdr);
 	}
-	sdkp->medium_access_timed_out = 0;
 
-	if (unlikely(driver_byte(result) == DRIVER_SENSE ||
-		     (sense_valid && !sense_deferred)))
-		good_bytes = sd_done_sense(SCpnt, good_bytes, &sshdr);
+	sdkp->medium_access_timed_out = 0;
 
 	if (sd_is_zoned(sdkp))
 		sd_zbc_complete(SCpnt, good_bytes, &sshdr);