diff mbox series

[v8,5/9] scsi: core: Retry unaligned zoned writes

Message ID 20230811213604.548235-6-bvanassche@acm.org (mailing list archive)
State New, archived
Headers show
Series Improve performance for zoned UFS devices | expand

Commit Message

Bart Van Assche Aug. 11, 2023, 9:35 p.m. UTC
If zoned writes (REQ_OP_WRITE) for a sequential write required zone have
a starting LBA that differs from the write pointer, e.g. because zoned
writes have been reordered, then the storage device will respond with an
UNALIGNED WRITE COMMAND error. Send commands that failed with an
unaligned write error to the SCSI error handler if zone write locking is
disabled. Let the SCSI error handler sort SCSI commands per LBA before
resubmitting these.

If zone write locking is disabled, increase the number of retries for
write commands sent to a sequential zone to the maximum number of
outstanding commands because in the worst case the number of times
reordered zoned writes have to be retried is (number of outstanding
writes per sequential zone) - 1.

Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Damien Le Moal <dlemoal@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/scsi/scsi_error.c | 16 ++++++++++++++++
 drivers/scsi/scsi_lib.c   |  1 +
 drivers/scsi/sd.c         |  2 ++
 include/scsi/scsi.h       |  1 +
 4 files changed, 20 insertions(+)

Comments

Damien Le Moal Aug. 14, 2023, 12:36 p.m. UTC | #1
On 8/12/23 06:35, Bart Van Assche wrote:
> If zoned writes (REQ_OP_WRITE) for a sequential write required zone have
> a starting LBA that differs from the write pointer, e.g. because zoned
> writes have been reordered, then the storage device will respond with an
> UNALIGNED WRITE COMMAND error. Send commands that failed with an
> unaligned write error to the SCSI error handler if zone write locking is
> disabled. Let the SCSI error handler sort SCSI commands per LBA before
> resubmitting these.
> 
> If zone write locking is disabled, increase the number of retries for
> write commands sent to a sequential zone to the maximum number of
> outstanding commands because in the worst case the number of times
> reordered zoned writes have to be retried is (number of outstanding
> writes per sequential zone) - 1.
> 
> Cc: Martin K. Petersen <martin.petersen@oracle.com>
> Cc: Damien Le Moal <dlemoal@kernel.org>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Ming Lei <ming.lei@redhat.com>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  drivers/scsi/scsi_error.c | 16 ++++++++++++++++
>  drivers/scsi/scsi_lib.c   |  1 +
>  drivers/scsi/sd.c         |  2 ++
>  include/scsi/scsi.h       |  1 +
>  4 files changed, 20 insertions(+)
> 
> diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
> index 0d7835bdc8af..7ae43fac07b7 100644
> --- a/drivers/scsi/scsi_error.c
> +++ b/drivers/scsi/scsi_error.c
> @@ -699,6 +699,22 @@ enum scsi_disposition scsi_check_sense(struct scsi_cmnd *scmd)
>  		fallthrough;
>  
>  	case ILLEGAL_REQUEST:
> +		/*
> +		 * Unaligned write command. This may indicate that zoned writes
> +		 * have been received by the device in the wrong order. If zone
> +		 * write locking is disabled, retry after all pending commands
> +		 * have completed.
> +		 */
> +		if (sshdr.asc == 0x21 && sshdr.ascq == 0x04 &&
> +		    !req->q->limits.use_zone_write_lock &&
> +		    blk_rq_is_seq_zoned_write(req)) {
> +			SCSI_LOG_ERROR_RECOVERY(3,
> +				sdev_printk(KERN_INFO, scmd->device,
> +					    "Retrying unaligned write at LBA %#llx.\n",
> +					    scsi_get_lba(scmd)));
> +			return NEEDS_DELAYED_RETRY;
> +		}
> +
>  		if (sshdr.asc == 0x20 || /* Invalid command operation code */
>  		    sshdr.asc == 0x21 || /* Logical block address out of range */
>  		    sshdr.asc == 0x22 || /* Invalid function */
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> index 59176946ab56..69da8aee13df 100644
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -1443,6 +1443,7 @@ static void scsi_complete(struct request *rq)
>  	case ADD_TO_MLQUEUE:
>  		scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);
>  		break;
> +	case NEEDS_DELAYED_RETRY:
>  	default:
>  		scsi_eh_scmd_add(cmd);
>  		break;
> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> index 4d9c6ad11cca..c8466c6c7387 100644
> --- a/drivers/scsi/sd.c
> +++ b/drivers/scsi/sd.c
> @@ -1238,6 +1238,8 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
>  	cmd->transfersize = sdp->sector_size;
>  	cmd->underflow = nr_blocks << 9;
>  	cmd->allowed = sdkp->max_retries;
> +	if (!rq->q->limits.use_zone_write_lock && blk_rq_is_seq_zoned_write(rq))

This condition could be written as a little inline helper
blk_req_need_zone_write_lock(), which could be used in mq-dealine patch 2.

> +		cmd->allowed += rq->q->nr_requests;
>  	cmd->sdb.length = nr_blocks * sdp->sector_size;
>  
>  	SCSI_LOG_HLQUEUE(1,
> diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
> index ec093594ba53..6600db046227 100644
> --- a/include/scsi/scsi.h
> +++ b/include/scsi/scsi.h
> @@ -93,6 +93,7 @@ static inline int scsi_status_is_check_condition(int status)
>   * Internal return values.
>   */
>  enum scsi_disposition {
> +	NEEDS_DELAYED_RETRY	= 0x2000,
>  	NEEDS_RETRY		= 0x2001,
>  	SUCCESS			= 0x2002,
>  	FAILED			= 0x2003,
Bart Van Assche Aug. 14, 2023, 5:57 p.m. UTC | #2
On 8/14/23 05:36, Damien Le Moal wrote:
> On 8/12/23 06:35, Bart Van Assche wrote:
>> +	if (!rq->q->limits.use_zone_write_lock && blk_rq_is_seq_zoned_write(rq))
> 
> This condition could be written as a little inline helper
> blk_req_need_zone_write_lock(), which could be used in mq-dealine patch 2.

Hi Damien,

Since q->limits.use_zone_write_lock is being introduced, how about
modifying blk_req_needs_zone_write_lock() such that it tests that new member
variable instead of checking rq->q->disk->seq_zones_wlock? That would allow
me to leave out one change from block/mq-deadline.c. I do not have a strong
opinion about whether the name blk_req_needs_zone_write_lock() should be
retained or whether that function should be renamed into
blk_req_use_zone_write_lock().

Thanks,

Bart.
Damien Le Moal Aug. 15, 2023, 1:52 a.m. UTC | #3
On 8/15/23 02:57, Bart Van Assche wrote:
> On 8/14/23 05:36, Damien Le Moal wrote:
>> On 8/12/23 06:35, Bart Van Assche wrote:
>>> +	if (!rq->q->limits.use_zone_write_lock && blk_rq_is_seq_zoned_write(rq))
>>
>> This condition could be written as a little inline helper
>> blk_req_need_zone_write_lock(), which could be used in mq-dealine patch 2.
> 
> Hi Damien,
> 
> Since q->limits.use_zone_write_lock is being introduced, how about
> modifying blk_req_needs_zone_write_lock() such that it tests that new member
> variable instead of checking rq->q->disk->seq_zones_wlock? That would allow
> me to leave out one change from block/mq-deadline.c. I do not have a strong
> opinion about whether the name blk_req_needs_zone_write_lock() should be
> retained or whether that function should be renamed into
> blk_req_use_zone_write_lock().

Something like this ?

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 619ee41a51cc..a3980a71c0ac 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -57,7 +57,12 @@ EXPORT_SYMBOL_GPL(blk_zone_cond_str);
  */
 bool blk_req_needs_zone_write_lock(struct request *rq)
 {
-       if (!rq->q->disk->seq_zones_wlock)
+       struct request_queue *q = rq->q;
+
+       if (!q->limits.use_zone_write_lock)
+               return false;
+
+       if (!q->disk->seq_zones_wlock)
                return false;

        return blk_rq_is_seq_zoned_write(rq);

I think that is fine and avoids adding yet another helper.
I am OK with this.
Bart Van Assche Aug. 15, 2023, 5:29 p.m. UTC | #4
On 8/14/23 05:36, Damien Le Moal wrote:
> On 8/12/23 06:35, Bart Van Assche wrote:
>> --- a/drivers/scsi/sd.c
>> +++ b/drivers/scsi/sd.c
>> @@ -1238,6 +1238,8 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
>>   	cmd->transfersize = sdp->sector_size;
>>   	cmd->underflow = nr_blocks << 9;
>>   	cmd->allowed = sdkp->max_retries;
>> +	if (!rq->q->limits.use_zone_write_lock && blk_rq_is_seq_zoned_write(rq))
> 
> This condition could be written as a little inline helper
> blk_req_need_zone_write_lock(), which could be used in mq-dealine patch 2.

The above test differs from the tests in the mq-deadline I/O scheduler.
The mq-deadline I/O scheduler uses write locking if
rq->q->limits.use_zone_write_lock && q->disk->seq_zones_wlock &&
blk_rq_is_seq_zoned_write(rq). The above test is different and occurs
two times in scsi_error.c and one time in sd.c. Do you still want me to
introduce a helper function for that expression?

Thanks,

Bart.
Damien Le Moal Aug. 16, 2023, 1:13 a.m. UTC | #5
On 8/16/23 02:29, Bart Van Assche wrote:
> On 8/14/23 05:36, Damien Le Moal wrote:
>> On 8/12/23 06:35, Bart Van Assche wrote:
>>> --- a/drivers/scsi/sd.c
>>> +++ b/drivers/scsi/sd.c
>>> @@ -1238,6 +1238,8 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
>>>   	cmd->transfersize = sdp->sector_size;
>>>   	cmd->underflow = nr_blocks << 9;
>>>   	cmd->allowed = sdkp->max_retries;
>>> +	if (!rq->q->limits.use_zone_write_lock && blk_rq_is_seq_zoned_write(rq))
>>
>> This condition could be written as a little inline helper
>> blk_req_need_zone_write_lock(), which could be used in mq-dealine patch 2.
> 
> The above test differs from the tests in the mq-deadline I/O scheduler.
> The mq-deadline I/O scheduler uses write locking if
> rq->q->limits.use_zone_write_lock && q->disk->seq_zones_wlock &&
> blk_rq_is_seq_zoned_write(rq). The above test is different and occurs
> two times in scsi_error.c and one time in sd.c. Do you still want me to
> introduce a helper function for that expression?

May be not needed. But a comment explaining it may be good to add.
I still think that added the test for use_zone_write_lock in
blk_req_needs_zone_write_lock() is needed though, for consistency of all
functions related to zone locking.

> 
> Thanks,
> 
> Bart.
Bart Van Assche Aug. 16, 2023, 7:59 p.m. UTC | #6
On 8/15/23 18:13, Damien Le Moal wrote:
> I still think that added the test for use_zone_write_lock in
> blk_req_needs_zone_write_lock() is needed though, for consistency of all
> functions related to zone locking.

This has been implemented in v9 of this patch series. v9 has just been 
posted.

Thanks,

Bart.
diff mbox series

Patch

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 0d7835bdc8af..7ae43fac07b7 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -699,6 +699,22 @@  enum scsi_disposition scsi_check_sense(struct scsi_cmnd *scmd)
 		fallthrough;
 
 	case ILLEGAL_REQUEST:
+		/*
+		 * Unaligned write command. This may indicate that zoned writes
+		 * have been received by the device in the wrong order. If zone
+		 * write locking is disabled, retry after all pending commands
+		 * have completed.
+		 */
+		if (sshdr.asc == 0x21 && sshdr.ascq == 0x04 &&
+		    !req->q->limits.use_zone_write_lock &&
+		    blk_rq_is_seq_zoned_write(req)) {
+			SCSI_LOG_ERROR_RECOVERY(3,
+				sdev_printk(KERN_INFO, scmd->device,
+					    "Retrying unaligned write at LBA %#llx.\n",
+					    scsi_get_lba(scmd)));
+			return NEEDS_DELAYED_RETRY;
+		}
+
 		if (sshdr.asc == 0x20 || /* Invalid command operation code */
 		    sshdr.asc == 0x21 || /* Logical block address out of range */
 		    sshdr.asc == 0x22 || /* Invalid function */
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 59176946ab56..69da8aee13df 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1443,6 +1443,7 @@  static void scsi_complete(struct request *rq)
 	case ADD_TO_MLQUEUE:
 		scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);
 		break;
+	case NEEDS_DELAYED_RETRY:
 	default:
 		scsi_eh_scmd_add(cmd);
 		break;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 4d9c6ad11cca..c8466c6c7387 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1238,6 +1238,8 @@  static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
 	cmd->transfersize = sdp->sector_size;
 	cmd->underflow = nr_blocks << 9;
 	cmd->allowed = sdkp->max_retries;
+	if (!rq->q->limits.use_zone_write_lock && blk_rq_is_seq_zoned_write(rq))
+		cmd->allowed += rq->q->nr_requests;
 	cmd->sdb.length = nr_blocks * sdp->sector_size;
 
 	SCSI_LOG_HLQUEUE(1,
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index ec093594ba53..6600db046227 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -93,6 +93,7 @@  static inline int scsi_status_is_check_condition(int status)
  * Internal return values.
  */
 enum scsi_disposition {
+	NEEDS_DELAYED_RETRY	= 0x2000,
 	NEEDS_RETRY		= 0x2001,
 	SUCCESS			= 0x2002,
 	FAILED			= 0x2003,