diff mbox

[20/23] scsi_dh_alua: Recheck state on unit attention

Message ID 1440679281-13234-21-git-send-email-hare@suse.de (mailing list archive)
State New, archived
Headers show

Commit Message

Hannes Reinecke Aug. 27, 2015, 12:41 p.m. UTC
When we receive a unit attention code of 'ALUA state changed'
we should recheck the state, as it might be due to an implicit
ALUA state transition.
At the same time a workqueue item might already be queued, which
should be started immediately to avoid any delays.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/scsi/device_handler/scsi_dh_alua.c | 48 +++++++++++++++---------------
 1 file changed, 24 insertions(+), 24 deletions(-)

Comments

Christoph Hellwig Sept. 1, 2015, 10:31 a.m. UTC | #1
> -			alua_check(sdev);
> -			return ADD_TO_MLQUEUE;
> +			alua_check(sdev, false);
> +			return NEEDS_RETRY;

What's the reason for the change in return value here?
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ewan Milne Sept. 22, 2015, 7:57 p.m. UTC | #2
On Thu, 2015-08-27 at 14:41 +0200, Hannes Reinecke wrote:
> When we receive a unit attention code of 'ALUA state changed'
> we should recheck the state, as it might be due to an implicit
> ALUA state transition.
> At the same time a workqueue item might already be queued, which
> should be started immediately to avoid any delays.
> 
> Signed-off-by: Hannes Reinecke <hare@suse.de>
> ---
>  drivers/scsi/device_handler/scsi_dh_alua.c | 48 +++++++++++++++---------------
>  1 file changed, 24 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
> index 85fd597..8717fd3 100644
> --- a/drivers/scsi/device_handler/scsi_dh_alua.c
> +++ b/drivers/scsi/device_handler/scsi_dh_alua.c
> @@ -118,7 +118,7 @@ static char print_alua_state(int);
>  static void alua_rtpg_work(struct work_struct *work);
>  static void alua_stpg_work(struct work_struct *work);
>  static void alua_qdata_work(struct work_struct *work);
> -static void alua_check(struct scsi_device *sdev);
> +static void alua_check(struct scsi_device *sdev, bool force);
>  
>  static void release_port_group(struct kref *kref)
>  {
> @@ -423,7 +423,7 @@ static char print_alua_state(int state)
>  }
>  
>  static int alua_check_sense(struct scsi_device *sdev,
> -			    struct scsi_sense_hdr *sense_hdr)
> +			     struct scsi_sense_hdr *sense_hdr)
>  {
>  	switch (sense_hdr->sense_key) {
>  	case NOT_READY:
> @@ -432,36 +432,34 @@ static int alua_check_sense(struct scsi_device *sdev,
>  			 * LUN Not Accessible - ALUA state transition
>  			 * Kickoff worker to update internal state.
>  			 */
> -			alua_check(sdev);
> -			return ADD_TO_MLQUEUE;
> +			alua_check(sdev, false);
> +			return NEEDS_RETRY;
>  		}
>  		break;
>  	case UNIT_ATTENTION:
> -		if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00)
> -			/*
> -			 * Power On, Reset, or Bus Device Reset, just retry.
> -			 */
> -			return ADD_TO_MLQUEUE;
> -		if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04)
> -			/*
> -			 * Device internal reset
> -			 */
> -			return ADD_TO_MLQUEUE;
> -		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x01)
> +		if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) {
>  			/*
> -			 * Mode Parameter Changed
> +			 * Power On, Reset, or Bus Device Reset.
> +			 * Might have obscured a state transition,
> +			 * so schedule a recheck.
>  			 */
> +			alua_check(sdev, true);
>  			return ADD_TO_MLQUEUE;
> -		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06)
> +		}
> +		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) {
>  			/*
>  			 * ALUA state changed
>  			 */
> +			alua_check(sdev, true);
>  			return ADD_TO_MLQUEUE;
> -		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07)
> +		}
> +		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) {
>  			/*
>  			 * Implicit ALUA state transition failed
>  			 */
> +			alua_check(sdev, true);
>  			return ADD_TO_MLQUEUE;
> +		}
>  		break;
>  	}
>  
> @@ -811,7 +809,7 @@ static void alua_qdata_work(struct work_struct *work)
>  
>  static void alua_rtpg_queue(struct alua_port_group *pg,
>  			    struct scsi_device *sdev,
> -			    struct alua_queue_data *qdata)
> +			    struct alua_queue_data *qdata, bool force)
>  {
>  	int start_queue = 0;
>  	unsigned long flags;
> @@ -832,7 +830,9 @@ static void alua_rtpg_queue(struct alua_port_group *pg,
>  		pg->rtpg_sdev = sdev;
>  		scsi_device_get(sdev);
>  		start_queue = 1;
> -	}
> +	} else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force)
> +		start_queue = 1;
> +
>  	spin_unlock_irqrestore(&pg->rtpg_lock, flags);
>  
>  	if (start_queue)
> @@ -873,7 +873,7 @@ static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h)
>  	mutex_unlock(&h->init_mutex);
>  	if (pg) {
>  		pg->expiry = 0;
> -		alua_rtpg_queue(pg, sdev, NULL);
> +		alua_rtpg_queue(pg, sdev, NULL, true);
>  		kref_put(&pg->kref, release_port_group);
>  	}
>  	return error;
> @@ -982,7 +982,7 @@ static int alua_activate(struct scsi_device *sdev,
>  		pg->flags |= ALUA_OPTIMIZE_STPG;
>  		spin_unlock_irqrestore(&pg->rtpg_lock, flags);
>  	}
> -	alua_rtpg_queue(pg, sdev, qdata);
> +	alua_rtpg_queue(pg, sdev, qdata, true);
>  	kref_put(&pg->kref, release_port_group);
>  out:
>  	if (fn)
> @@ -996,7 +996,7 @@ out:
>   *
>   * Check the device status
>   */
> -static void alua_check(struct scsi_device *sdev)
> +static void alua_check(struct scsi_device *sdev, bool force)
>  {
>  	struct alua_dh_data *h = sdev->handler_data;
>  	struct alua_port_group *pg;
> @@ -1009,7 +1009,7 @@ static void alua_check(struct scsi_device *sdev)
>  	}
>  	kref_get(&pg->kref);
>  	rcu_read_unlock();
> -	alua_rtpg_queue(pg, sdev, NULL);
> +	alua_rtpg_queue(pg, sdev, NULL, force);
>  	kref_put(&pg->kref, release_port_group);
>  	rcu_read_unlock();
>  }

I think this change goes along with the previous patch in terms of the
design, so we'll have to see how that turns out.  Fundamentally, though,
this changes the handling for LUN Not Accessible - ALUA state transition
to be NEEDS_RETRY instead of ADD_TO_MLQUEUE, so what is the effect of that?

Reviewed-by: Ewan D. Milne <emilne@redhat.com>



--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hannes Reinecke Sept. 23, 2015, 1:01 p.m. UTC | #3
On 09/22/2015 09:57 PM, Ewan Milne wrote:
> On Thu, 2015-08-27 at 14:41 +0200, Hannes Reinecke wrote:
>> When we receive a unit attention code of 'ALUA state changed'
>> we should recheck the state, as it might be due to an implicit
>> ALUA state transition.
>> At the same time a workqueue item might already be queued, which
>> should be started immediately to avoid any delays.
>>
>> Signed-off-by: Hannes Reinecke <hare@suse.de>
>> ---
>>  drivers/scsi/device_handler/scsi_dh_alua.c | 48 +++++++++++++++---------------
>>  1 file changed, 24 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
>> index 85fd597..8717fd3 100644
>> --- a/drivers/scsi/device_handler/scsi_dh_alua.c
>> +++ b/drivers/scsi/device_handler/scsi_dh_alua.c
>> @@ -118,7 +118,7 @@ static char print_alua_state(int);
>>  static void alua_rtpg_work(struct work_struct *work);
>>  static void alua_stpg_work(struct work_struct *work);
>>  static void alua_qdata_work(struct work_struct *work);
>> -static void alua_check(struct scsi_device *sdev);
>> +static void alua_check(struct scsi_device *sdev, bool force);
>>  
>>  static void release_port_group(struct kref *kref)
>>  {
>> @@ -423,7 +423,7 @@ static char print_alua_state(int state)
>>  }
>>  
>>  static int alua_check_sense(struct scsi_device *sdev,
>> -			    struct scsi_sense_hdr *sense_hdr)
>> +			     struct scsi_sense_hdr *sense_hdr)
>>  {
>>  	switch (sense_hdr->sense_key) {
>>  	case NOT_READY:
>> @@ -432,36 +432,34 @@ static int alua_check_sense(struct scsi_device *sdev,
>>  			 * LUN Not Accessible - ALUA state transition
>>  			 * Kickoff worker to update internal state.
>>  			 */
>> -			alua_check(sdev);
>> -			return ADD_TO_MLQUEUE;
>> +			alua_check(sdev, false);
>> +			return NEEDS_RETRY;
>>  		}
>>  		break;
>>  	case UNIT_ATTENTION:
>> -		if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00)
>> -			/*
>> -			 * Power On, Reset, or Bus Device Reset, just retry.
>> -			 */
>> -			return ADD_TO_MLQUEUE;
>> -		if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04)
>> -			/*
>> -			 * Device internal reset
>> -			 */
>> -			return ADD_TO_MLQUEUE;
>> -		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x01)
>> +		if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) {
>>  			/*
>> -			 * Mode Parameter Changed
>> +			 * Power On, Reset, or Bus Device Reset.
>> +			 * Might have obscured a state transition,
>> +			 * so schedule a recheck.
>>  			 */
>> +			alua_check(sdev, true);
>>  			return ADD_TO_MLQUEUE;
>> -		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06)
>> +		}
>> +		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) {
>>  			/*
>>  			 * ALUA state changed
>>  			 */
>> +			alua_check(sdev, true);
>>  			return ADD_TO_MLQUEUE;
>> -		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07)
>> +		}
>> +		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) {
>>  			/*
>>  			 * Implicit ALUA state transition failed
>>  			 */
>> +			alua_check(sdev, true);
>>  			return ADD_TO_MLQUEUE;
>> +		}
>>  		break;
>>  	}
>>  
>> @@ -811,7 +809,7 @@ static void alua_qdata_work(struct work_struct *work)
>>  
>>  static void alua_rtpg_queue(struct alua_port_group *pg,
>>  			    struct scsi_device *sdev,
>> -			    struct alua_queue_data *qdata)
>> +			    struct alua_queue_data *qdata, bool force)
>>  {
>>  	int start_queue = 0;
>>  	unsigned long flags;
>> @@ -832,7 +830,9 @@ static void alua_rtpg_queue(struct alua_port_group *pg,
>>  		pg->rtpg_sdev = sdev;
>>  		scsi_device_get(sdev);
>>  		start_queue = 1;
>> -	}
>> +	} else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force)
>> +		start_queue = 1;
>> +
>>  	spin_unlock_irqrestore(&pg->rtpg_lock, flags);
>>  
>>  	if (start_queue)
>> @@ -873,7 +873,7 @@ static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h)
>>  	mutex_unlock(&h->init_mutex);
>>  	if (pg) {
>>  		pg->expiry = 0;
>> -		alua_rtpg_queue(pg, sdev, NULL);
>> +		alua_rtpg_queue(pg, sdev, NULL, true);
>>  		kref_put(&pg->kref, release_port_group);
>>  	}
>>  	return error;
>> @@ -982,7 +982,7 @@ static int alua_activate(struct scsi_device *sdev,
>>  		pg->flags |= ALUA_OPTIMIZE_STPG;
>>  		spin_unlock_irqrestore(&pg->rtpg_lock, flags);
>>  	}
>> -	alua_rtpg_queue(pg, sdev, qdata);
>> +	alua_rtpg_queue(pg, sdev, qdata, true);
>>  	kref_put(&pg->kref, release_port_group);
>>  out:
>>  	if (fn)
>> @@ -996,7 +996,7 @@ out:
>>   *
>>   * Check the device status
>>   */
>> -static void alua_check(struct scsi_device *sdev)
>> +static void alua_check(struct scsi_device *sdev, bool force)
>>  {
>>  	struct alua_dh_data *h = sdev->handler_data;
>>  	struct alua_port_group *pg;
>> @@ -1009,7 +1009,7 @@ static void alua_check(struct scsi_device *sdev)
>>  	}
>>  	kref_get(&pg->kref);
>>  	rcu_read_unlock();
>> -	alua_rtpg_queue(pg, sdev, NULL);
>> +	alua_rtpg_queue(pg, sdev, NULL, force);
>>  	kref_put(&pg->kref, release_port_group);
>>  	rcu_read_unlock();
>>  }
> 
> I think this change goes along with the previous patch in terms of the
> design, so we'll have to see how that turns out.  Fundamentally, though,
> this changes the handling for LUN Not Accessible - ALUA state transition
> to be NEEDS_RETRY instead of ADD_TO_MLQUEUE, so what is the effect of that?
> 
ADD_TO_MLQUEUE will always be retried, NEEDS_RETRY is bounded by the
number of retries.
Originally we had to return ADD_TO_MLQUEUE so to be sure to retry until
the transition is done, causing quite some retries.
As now we're polling the device from a workqueue we can terminate
the number of retries by returning NEEDS_RETRY, knowing that the polling
(and/or sense code evaluation) will tell us when the transition is finished.

Cheers,

Hannes
diff mbox

Patch

diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index 85fd597..8717fd3 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -118,7 +118,7 @@  static char print_alua_state(int);
 static void alua_rtpg_work(struct work_struct *work);
 static void alua_stpg_work(struct work_struct *work);
 static void alua_qdata_work(struct work_struct *work);
-static void alua_check(struct scsi_device *sdev);
+static void alua_check(struct scsi_device *sdev, bool force);
 
 static void release_port_group(struct kref *kref)
 {
@@ -423,7 +423,7 @@  static char print_alua_state(int state)
 }
 
 static int alua_check_sense(struct scsi_device *sdev,
-			    struct scsi_sense_hdr *sense_hdr)
+			     struct scsi_sense_hdr *sense_hdr)
 {
 	switch (sense_hdr->sense_key) {
 	case NOT_READY:
@@ -432,36 +432,34 @@  static int alua_check_sense(struct scsi_device *sdev,
 			 * LUN Not Accessible - ALUA state transition
 			 * Kickoff worker to update internal state.
 			 */
-			alua_check(sdev);
-			return ADD_TO_MLQUEUE;
+			alua_check(sdev, false);
+			return NEEDS_RETRY;
 		}
 		break;
 	case UNIT_ATTENTION:
-		if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00)
-			/*
-			 * Power On, Reset, or Bus Device Reset, just retry.
-			 */
-			return ADD_TO_MLQUEUE;
-		if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04)
-			/*
-			 * Device internal reset
-			 */
-			return ADD_TO_MLQUEUE;
-		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x01)
+		if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) {
 			/*
-			 * Mode Parameter Changed
+			 * Power On, Reset, or Bus Device Reset.
+			 * Might have obscured a state transition,
+			 * so schedule a recheck.
 			 */
+			alua_check(sdev, true);
 			return ADD_TO_MLQUEUE;
-		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06)
+		}
+		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) {
 			/*
 			 * ALUA state changed
 			 */
+			alua_check(sdev, true);
 			return ADD_TO_MLQUEUE;
-		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07)
+		}
+		if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) {
 			/*
 			 * Implicit ALUA state transition failed
 			 */
+			alua_check(sdev, true);
 			return ADD_TO_MLQUEUE;
+		}
 		break;
 	}
 
@@ -811,7 +809,7 @@  static void alua_qdata_work(struct work_struct *work)
 
 static void alua_rtpg_queue(struct alua_port_group *pg,
 			    struct scsi_device *sdev,
-			    struct alua_queue_data *qdata)
+			    struct alua_queue_data *qdata, bool force)
 {
 	int start_queue = 0;
 	unsigned long flags;
@@ -832,7 +830,9 @@  static void alua_rtpg_queue(struct alua_port_group *pg,
 		pg->rtpg_sdev = sdev;
 		scsi_device_get(sdev);
 		start_queue = 1;
-	}
+	} else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force)
+		start_queue = 1;
+
 	spin_unlock_irqrestore(&pg->rtpg_lock, flags);
 
 	if (start_queue)
@@ -873,7 +873,7 @@  static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h)
 	mutex_unlock(&h->init_mutex);
 	if (pg) {
 		pg->expiry = 0;
-		alua_rtpg_queue(pg, sdev, NULL);
+		alua_rtpg_queue(pg, sdev, NULL, true);
 		kref_put(&pg->kref, release_port_group);
 	}
 	return error;
@@ -982,7 +982,7 @@  static int alua_activate(struct scsi_device *sdev,
 		pg->flags |= ALUA_OPTIMIZE_STPG;
 		spin_unlock_irqrestore(&pg->rtpg_lock, flags);
 	}
-	alua_rtpg_queue(pg, sdev, qdata);
+	alua_rtpg_queue(pg, sdev, qdata, true);
 	kref_put(&pg->kref, release_port_group);
 out:
 	if (fn)
@@ -996,7 +996,7 @@  out:
  *
  * Check the device status
  */
-static void alua_check(struct scsi_device *sdev)
+static void alua_check(struct scsi_device *sdev, bool force)
 {
 	struct alua_dh_data *h = sdev->handler_data;
 	struct alua_port_group *pg;
@@ -1009,7 +1009,7 @@  static void alua_check(struct scsi_device *sdev)
 	}
 	kref_get(&pg->kref);
 	rcu_read_unlock();
-	alua_rtpg_queue(pg, sdev, NULL);
+	alua_rtpg_queue(pg, sdev, NULL, force);
 	kref_put(&pg->kref, release_port_group);
 	rcu_read_unlock();
 }