diff mbox series

[v3,4/6] scsi: Retry unaligned zoned writes

Message ID 20230726005742.303865-5-bvanassche@acm.org (mailing list archive)
State New, archived
Headers show
Series Improve the performance of F2FS on zoned UFS | expand

Commit Message

Bart Van Assche July 26, 2023, 12:57 a.m. UTC
From ZBC-2: "The device server terminates with CHECK CONDITION status, with
the sense key set to ILLEGAL REQUEST, and the additional sense code set to
UNALIGNED WRITE COMMAND a write command, other than an entire medium write
same command, that specifies: a) the starting LBA in a sequential write
required zone set to a value that is not equal to the write pointer for
that sequential write required zone; or b) an ending LBA that is not equal
to the last logical block within a physical block (see SBC-5)."

Send commands that failed with an unaligned write error to the SCSI error
handler. Let the SCSI error handler sort SCSI commands per LBA before
resubmitting these.

Increase the number of retries for write commands sent to a sequential
zone to the maximum number of outstanding commands.

Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/scsi/scsi_error.c | 37 +++++++++++++++++++++++++++++++++++++
 drivers/scsi/scsi_lib.c   |  1 +
 drivers/scsi/sd.c         |  3 +++
 include/scsi/scsi.h       |  1 +
 4 files changed, 42 insertions(+)

Comments

Damien Le Moal July 26, 2023, 8:47 a.m. UTC | #1
On 7/26/23 09:57, Bart Van Assche wrote:
> From ZBC-2: "The device server terminates with CHECK CONDITION status, with
> the sense key set to ILLEGAL REQUEST, and the additional sense code set to
> UNALIGNED WRITE COMMAND a write command, other than an entire medium write
> same command, that specifies: a) the starting LBA in a sequential write
> required zone set to a value that is not equal to the write pointer for
> that sequential write required zone; or b) an ending LBA that is not equal
> to the last logical block within a physical block (see SBC-5)."
> 
> Send commands that failed with an unaligned write error to the SCSI error
> handler. Let the SCSI error handler sort SCSI commands per LBA before
> resubmitting these.
> 
> Increase the number of retries for write commands sent to a sequential
> zone to the maximum number of outstanding commands.
> 
> Cc: Martin K. Petersen <martin.petersen@oracle.com>
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Damien Le Moal <damien.lemoal@opensource.wdc.com>
> Cc: Ming Lei <ming.lei@redhat.com>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  drivers/scsi/scsi_error.c | 37 +++++++++++++++++++++++++++++++++++++
>  drivers/scsi/scsi_lib.c   |  1 +
>  drivers/scsi/sd.c         |  3 +++
>  include/scsi/scsi.h       |  1 +
>  4 files changed, 42 insertions(+)
> 
> diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
> index c67cdcdc3ba8..2b9aec05dc36 100644
> --- a/drivers/scsi/scsi_error.c
> +++ b/drivers/scsi/scsi_error.c
> @@ -27,6 +27,7 @@
>  #include <linux/blkdev.h>
>  #include <linux/delay.h>
>  #include <linux/jiffies.h>
> +#include <linux/list_sort.h>
>  
>  #include <scsi/scsi.h>
>  #include <scsi/scsi_cmnd.h>
> @@ -698,6 +699,17 @@ enum scsi_disposition scsi_check_sense(struct scsi_cmnd *scmd)
>  		fallthrough;
>  
>  	case ILLEGAL_REQUEST:
> +		/*
> +		 * Unaligned write command. This indicates that zoned writes
> +		 * have been received by the device in the wrong order. If zone
> +		 * write locking is disabled, retry after all pending commands
> +		 * have completed.
> +		 */
> +		if (sshdr.asc == 0x21 && sshdr.ascq == 0x04 &&
> +		    blk_queue_no_zone_write_lock(sdev->request_queue) &&
> +		    !scsi_noretry_cmd(scmd) && scsi_cmd_retry_allowed(scmd))
> +			return NEEDS_DELAYED_RETRY;
> +
>  		if (sshdr.asc == 0x20 || /* Invalid command operation code */
>  		    sshdr.asc == 0x21 || /* Logical block address out of range */
>  		    sshdr.asc == 0x22 || /* Invalid function */
> @@ -2223,6 +2235,25 @@ void scsi_eh_flush_done_q(struct list_head *done_q)
>  }
>  EXPORT_SYMBOL(scsi_eh_flush_done_q);
>  
> +/*
> + * Returns a negative value if @_a has a lower LBA than @_b, zero if
> + * both have the same LBA and a positive value otherwise.
> + */
> +static int scsi_cmp_lba(void *priv, const struct list_head *_a,
> +			const struct list_head *_b)

The argument priv is unused.

> +{
> +	struct scsi_cmnd *a = list_entry(_a, typeof(*a), eh_entry);
> +	struct scsi_cmnd *b = list_entry(_b, typeof(*b), eh_entry);
> +	const sector_t pos_a = blk_rq_pos(scsi_cmd_to_rq(a));
> +	const sector_t pos_b = blk_rq_pos(scsi_cmd_to_rq(b));
> +
> +	if (pos_a < pos_b)
> +		return -1;
> +	if (pos_a > pos_b)
> +		return 1;
> +	return 0;
> +}
> +
>  /**
>   * scsi_unjam_host - Attempt to fix a host which has a cmd that failed.
>   * @shost:	Host to unjam.
> @@ -2258,6 +2289,12 @@ static void scsi_unjam_host(struct Scsi_Host *shost)
>  
>  	SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(shost, &eh_work_q));
>  
> +	/*
> +	 * Sort pending SCSI commands in LBA order. This is important if zone
> +	 * write locking is disabled for a zoned SCSI device.
> +	 */
> +	list_sort(NULL, &eh_work_q, scsi_cmp_lba);

Should we do this only for zoned devices ?

> +
>  	if (!scsi_eh_get_sense(&eh_work_q, &eh_done_q))
>  		scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q);
>  
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> index 59176946ab56..69da8aee13df 100644
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -1443,6 +1443,7 @@ static void scsi_complete(struct request *rq)
>  	case ADD_TO_MLQUEUE:
>  		scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);
>  		break;
> +	case NEEDS_DELAYED_RETRY:
>  	default:
>  		scsi_eh_scmd_add(cmd);
>  		break;
> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> index 68b12afa0721..27b9ebe05b90 100644
> --- a/drivers/scsi/sd.c
> +++ b/drivers/scsi/sd.c
> @@ -1235,6 +1235,9 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
>  	cmd->transfersize = sdp->sector_size;
>  	cmd->underflow = nr_blocks << 9;
>  	cmd->allowed = sdkp->max_retries;
> +	if (blk_queue_no_zone_write_lock(rq->q) &&
> +	    blk_rq_is_seq_zoned_write(rq))
> +		cmd->allowed += rq->q->nr_requests;

Aouch... that could be a lot...

>  	cmd->sdb.length = nr_blocks * sdp->sector_size;
>  
>  	SCSI_LOG_HLQUEUE(1,
> diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
> index ec093594ba53..6600db046227 100644
> --- a/include/scsi/scsi.h
> +++ b/include/scsi/scsi.h
> @@ -93,6 +93,7 @@ static inline int scsi_status_is_check_condition(int status)
>   * Internal return values.
>   */
>  enum scsi_disposition {
> +	NEEDS_DELAYED_RETRY	= 0x2000,
>  	NEEDS_RETRY		= 0x2001,
>  	SUCCESS			= 0x2002,
>  	FAILED			= 0x2003,
Bart Van Assche July 26, 2023, 3:02 p.m. UTC | #2
On 7/26/23 01:47, Damien Le Moal wrote:
> On 7/26/23 09:57, Bart Van Assche wrote:
>> +/*
>> + * Returns a negative value if @_a has a lower LBA than @_b, zero if
>> + * both have the same LBA and a positive value otherwise.
>> + */
>> +static int scsi_cmp_lba(void *priv, const struct list_head *_a,
>> +			const struct list_head *_b)
> 
> The argument priv is unused.

I cannot remove the 'priv' argument. From include/linux/list_sort.h:

typedef int __attribute__((nonnull(2,3))) (*list_cmp_func_t)(void *,
		const struct list_head *, const struct list_head *);

__attribute__((nonnull(2,3)))
void list_sort(void *priv, struct list_head *head, list_cmp_func_t cmp);

>>   /**
>>    * scsi_unjam_host - Attempt to fix a host which has a cmd that failed.
>>    * @shost:	Host to unjam.
>> @@ -2258,6 +2289,12 @@ static void scsi_unjam_host(struct Scsi_Host *shost)
>>   
>>   	SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(shost, &eh_work_q));
>>   
>> +	/*
>> +	 * Sort pending SCSI commands in LBA order. This is important if zone
>> +	 * write locking is disabled for a zoned SCSI device.
>> +	 */
>> +	list_sort(NULL, &eh_work_q, scsi_cmp_lba);
> 
> Should we do this only for zoned devices ?

I'm not sure this is possible. Error handling happens per SCSI host. 
Some of the logical units associated with a host may be zoned while 
others may represent conventional storage or have no storage associated 
with them (WLUN).

Thanks,

Bart.
Damien Le Moal July 26, 2023, 11:46 p.m. UTC | #3
On 7/27/23 00:02, Bart Van Assche wrote:
> On 7/26/23 01:47, Damien Le Moal wrote:
>> On 7/26/23 09:57, Bart Van Assche wrote:
>>> +/*
>>> + * Returns a negative value if @_a has a lower LBA than @_b, zero if
>>> + * both have the same LBA and a positive value otherwise.
>>> + */
>>> +static int scsi_cmp_lba(void *priv, const struct list_head *_a,
>>> +            const struct list_head *_b)
>>
>> The argument priv is unused.
> 
> I cannot remove the 'priv' argument. From include/linux/list_sort.h:
> 
> typedef int __attribute__((nonnull(2,3))) (*list_cmp_func_t)(void *,
>         const struct list_head *, const struct list_head *);
> 
> __attribute__((nonnull(2,3)))
> void list_sort(void *priv, struct list_head *head, list_cmp_func_t cmp);

Yes. I missed that this is a callbalck. Sorry for the noise.

> 
>>>   /**
>>>    * scsi_unjam_host - Attempt to fix a host which has a cmd that failed.
>>>    * @shost:    Host to unjam.
>>> @@ -2258,6 +2289,12 @@ static void scsi_unjam_host(struct Scsi_Host *shost)
>>>         SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(shost, &eh_work_q));
>>>   +    /*
>>> +     * Sort pending SCSI commands in LBA order. This is important if zone
>>> +     * write locking is disabled for a zoned SCSI device.
>>> +     */
>>> +    list_sort(NULL, &eh_work_q, scsi_cmp_lba);
>>
>> Should we do this only for zoned devices ?
> 
> I'm not sure this is possible. Error handling happens per SCSI host. Some of
> the logical units associated with a host may be zoned while others may
> represent conventional storage or have no storage associated with them (WLUN).
> 
> Thanks,
> 
> Bart.
diff mbox series

Patch

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index c67cdcdc3ba8..2b9aec05dc36 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -27,6 +27,7 @@ 
 #include <linux/blkdev.h>
 #include <linux/delay.h>
 #include <linux/jiffies.h>
+#include <linux/list_sort.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
@@ -698,6 +699,17 @@  enum scsi_disposition scsi_check_sense(struct scsi_cmnd *scmd)
 		fallthrough;
 
 	case ILLEGAL_REQUEST:
+		/*
+		 * Unaligned write command. This indicates that zoned writes
+		 * have been received by the device in the wrong order. If zone
+		 * write locking is disabled, retry after all pending commands
+		 * have completed.
+		 */
+		if (sshdr.asc == 0x21 && sshdr.ascq == 0x04 &&
+		    blk_queue_no_zone_write_lock(sdev->request_queue) &&
+		    !scsi_noretry_cmd(scmd) && scsi_cmd_retry_allowed(scmd))
+			return NEEDS_DELAYED_RETRY;
+
 		if (sshdr.asc == 0x20 || /* Invalid command operation code */
 		    sshdr.asc == 0x21 || /* Logical block address out of range */
 		    sshdr.asc == 0x22 || /* Invalid function */
@@ -2223,6 +2235,25 @@  void scsi_eh_flush_done_q(struct list_head *done_q)
 }
 EXPORT_SYMBOL(scsi_eh_flush_done_q);
 
+/*
+ * Returns a negative value if @_a has a lower LBA than @_b, zero if
+ * both have the same LBA and a positive value otherwise.
+ */
+static int scsi_cmp_lba(void *priv, const struct list_head *_a,
+			const struct list_head *_b)
+{
+	struct scsi_cmnd *a = list_entry(_a, typeof(*a), eh_entry);
+	struct scsi_cmnd *b = list_entry(_b, typeof(*b), eh_entry);
+	const sector_t pos_a = blk_rq_pos(scsi_cmd_to_rq(a));
+	const sector_t pos_b = blk_rq_pos(scsi_cmd_to_rq(b));
+
+	if (pos_a < pos_b)
+		return -1;
+	if (pos_a > pos_b)
+		return 1;
+	return 0;
+}
+
 /**
  * scsi_unjam_host - Attempt to fix a host which has a cmd that failed.
  * @shost:	Host to unjam.
@@ -2258,6 +2289,12 @@  static void scsi_unjam_host(struct Scsi_Host *shost)
 
 	SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(shost, &eh_work_q));
 
+	/*
+	 * Sort pending SCSI commands in LBA order. This is important if zone
+	 * write locking is disabled for a zoned SCSI device.
+	 */
+	list_sort(NULL, &eh_work_q, scsi_cmp_lba);
+
 	if (!scsi_eh_get_sense(&eh_work_q, &eh_done_q))
 		scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q);
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 59176946ab56..69da8aee13df 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1443,6 +1443,7 @@  static void scsi_complete(struct request *rq)
 	case ADD_TO_MLQUEUE:
 		scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);
 		break;
+	case NEEDS_DELAYED_RETRY:
 	default:
 		scsi_eh_scmd_add(cmd);
 		break;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 68b12afa0721..27b9ebe05b90 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1235,6 +1235,9 @@  static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
 	cmd->transfersize = sdp->sector_size;
 	cmd->underflow = nr_blocks << 9;
 	cmd->allowed = sdkp->max_retries;
+	if (blk_queue_no_zone_write_lock(rq->q) &&
+	    blk_rq_is_seq_zoned_write(rq))
+		cmd->allowed += rq->q->nr_requests;
 	cmd->sdb.length = nr_blocks * sdp->sector_size;
 
 	SCSI_LOG_HLQUEUE(1,
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index ec093594ba53..6600db046227 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -93,6 +93,7 @@  static inline int scsi_status_is_check_condition(int status)
  * Internal return values.
  */
 enum scsi_disposition {
+	NEEDS_DELAYED_RETRY	= 0x2000,
 	NEEDS_RETRY		= 0x2001,
 	SUCCESS			= 0x2002,
 	FAILED			= 0x2003,