diff mbox

[15/36] scsi_dh_alua: Make stpg synchronous

Message ID 1443523658-87622-16-git-send-email-hare@suse.de (mailing list archive)
State Superseded, archived
Headers show

Commit Message

Hannes Reinecke Sept. 29, 2015, 10:47 a.m. UTC
The 'activate_complete' function needs to be executed after
stpg has finished, so we can as well execute stpg synchronously
and call the function directly.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/scsi/device_handler/scsi_dh_alua.c | 133 +++++++++++------------------
 1 file changed, 50 insertions(+), 83 deletions(-)

Comments

Bart Van Assche Oct. 2, 2015, 5:33 p.m. UTC | #1
On 09/29/2015 03:47 AM, Hannes Reinecke wrote:
> The 'activate_complete' function needs to be executed after
> stpg has finished, so we can as well execute stpg synchronously
> and call the function directly.

Hello Hannes,

Another patch in this series moves invocation of the STPG commands to 
the context of a workqueue. The setup on which I have been testing this 
patch series consists of an initiator system with four ports and a 
target system with eight ports and 100 LUNs. As a result, 4*8*100=3200 
/dev/sd* devices are created on the initiator system and monitored by 
the scsi_dh_alua handler. How many kernel threads will be needed to 
monitor all these paths concurrently and how much memory will be 
required for all these kernel threads ? What if the number of LUNs would 
be even higher, e.g. 1000 ? Sorry but because of scalability concerns my 
preference is that the RTPG and STPG commands are invoked asynchronously.

Thanks,

Bart.
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hannes Reinecke Oct. 7, 2015, 8:48 p.m. UTC | #2
On 10/02/2015 07:33 PM, Bart Van Assche wrote:
> On 09/29/2015 03:47 AM, Hannes Reinecke wrote:
>> The 'activate_complete' function needs to be executed after
>> stpg has finished, so we can as well execute stpg synchronously
>> and call the function directly.
>
> Hello Hannes,
>
> Another patch in this series moves invocation of the STPG commands to
> the context of a workqueue. The setup on which I have been testing this
> patch series consists of an initiator system with four ports and a
> target system with eight ports and 100 LUNs. As a result, 4*8*100=3200
> /dev/sd* devices are created on the initiator system and monitored by
> the scsi_dh_alua handler. How many kernel threads will be needed to
> monitor all these paths concurrently and how much memory will be
> required for all these kernel threads ? What if the number of LUNs would
> be even higher, e.g. 1000 ? Sorry but because of scalability concerns my
> preference is that the RTPG and STPG commands are invoked asynchronously.
>
The paths are not 'monitored' in the normal sense, ie the workqueue is 
not always active. The workqueue items is active only if a state change 
needs to be handled.
I doubt that the overall resource usage will increase here, as 
originally we would need to execute '->activate' for every path.
With the new design we will be starting a workqueue item _per port group 
and LUN_, which will run until the new state could be successfully 
retrieved.

I have considered using an asynchronous implementation, but sadly 
scsi_execute() and friends doesn't have an asynchronous version.
Also we would need to handle quite a substantial bulk of computation
from within an interrupt handler, which would make the operation
really tricky.

We should be doing some analysis here to figure out if there really is 
an increased memory usage and if this would turn out to be an issue.

Cheers,

Hannes
diff mbox

Patch

diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index 59fe3e9..7c3789b 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -170,76 +170,28 @@  done:
 }
 
 /*
- * stpg_endio - Evaluate SET TARGET GROUP STATES
- * @sdev: the device to be evaluated
- * @state: the new target group state
- *
- * Evaluate a SET TARGET GROUP STATES command response.
- */
-static void stpg_endio(struct request *req, int error)
-{
-	struct alua_dh_data *h = req->end_io_data;
-	struct scsi_sense_hdr sense_hdr;
-	unsigned err = SCSI_DH_OK;
-
-	if (host_byte(req->errors) != DID_OK ||
-	    msg_byte(req->errors) != COMMAND_COMPLETE) {
-		err = SCSI_DH_IO;
-		goto done;
-	}
-
-	if (scsi_normalize_sense(h->sense, SCSI_SENSE_BUFFERSIZE,
-				 &sense_hdr)) {
-		err = alua_check_sense(h->sdev, &sense_hdr);
-		if (err == ADD_TO_MLQUEUE) {
-			err = SCSI_DH_RETRY;
-			goto done;
-		}
-		sdev_printk(KERN_INFO, h->sdev, "%s: stpg failed\n",
-			    ALUA_DH_NAME);
-		scsi_print_sense_hdr(h->sdev, ALUA_DH_NAME, &sense_hdr);
-		err = SCSI_DH_IO;
-	} else if (error)
-		err = SCSI_DH_IO;
-
-	if (err == SCSI_DH_OK) {
-		h->state = TPGS_STATE_OPTIMIZED;
-		sdev_printk(KERN_INFO, h->sdev,
-			    "%s: port group %02x switched to state %c\n",
-			    ALUA_DH_NAME, h->group_id,
-			    print_alua_state(h->state));
-	}
-done:
-	req->end_io_data = NULL;
-	__blk_put_request(req->q, req);
-	if (h->callback_fn) {
-		h->callback_fn(h->callback_data, err);
-		h->callback_fn = h->callback_data = NULL;
-	}
-	return;
-}
-
-/*
  * submit_stpg - Issue a SET TARGET GROUP STATES command
  *
  * Currently we're only setting the current target port group state
  * to 'active/optimized' and let the array firmware figure out
  * the states of the remaining groups.
  */
-static unsigned submit_stpg(struct alua_dh_data *h)
+static unsigned submit_stpg(struct scsi_device *sdev, int group_id,
+			    unsigned char *sense)
 {
 	struct request *rq;
+	unsigned char stpg_data[8];
 	int stpg_len = 8;
-	struct scsi_device *sdev = h->sdev;
+	int err = 0;
 
 	/* Prepare the data buffer */
-	memset(h->buff, 0, stpg_len);
-	h->buff[4] = TPGS_STATE_OPTIMIZED & 0x0f;
-	put_unaligned_be16(h->group_id, &h->buff[6]);
+	memset(stpg_data, 0, stpg_len);
+	stpg_data[4] = TPGS_STATE_OPTIMIZED & 0x0f;
+	put_unaligned_be16(group_id, &stpg_data[6]);
 
-	rq = get_alua_req(sdev, h->buff, stpg_len, WRITE);
+	rq = get_alua_req(sdev, stpg_data, stpg_len, WRITE);
 	if (!rq)
-		return SCSI_DH_RES_TEMP_UNAVAIL;
+		return DRIVER_BUSY << 24;
 
 	/* Prepare the command. */
 	rq->cmd[0] = MAINTENANCE_OUT;
@@ -247,13 +199,17 @@  static unsigned submit_stpg(struct alua_dh_data *h)
 	put_unaligned_be32(stpg_len, &rq->cmd[6]);
 	rq->cmd_len = COMMAND_SIZE(MAINTENANCE_OUT);
 
-	rq->sense = h->sense;
+	rq->sense = sense;
 	memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
 	rq->sense_len = 0;
-	rq->end_io_data = h;
 
-	blk_execute_rq_nowait(rq->q, NULL, rq, 1, stpg_endio);
-	return SCSI_DH_OK;
+	blk_execute_rq(rq->q, NULL, rq, 1);
+	if (rq->errors)
+		err = rq->errors;
+
+	blk_put_request(rq);
+
+	return err;
 }
 
 /*
@@ -620,47 +576,59 @@  static int alua_rtpg(struct scsi_device *sdev, struct alua_dh_data *h, int wait_
  * response. Returns SCSI_DH_RETRY per default to trigger
  * a re-evaluation of the target group state.
  */
-static unsigned alua_stpg(struct scsi_device *sdev, struct alua_dh_data *h,
-			  activate_complete fn, void *data)
+static unsigned alua_stpg(struct scsi_device *sdev, struct alua_dh_data *h)
 {
-	int err = SCSI_DH_OK;
-	int stpg = 0;
+	int retval;
+	struct scsi_sense_hdr sense_hdr;
 
 	if (!(h->tpgs & TPGS_MODE_EXPLICIT)) {
-		/* Only implicit ALUA supported */
-		return err;
+		/* Only implicit ALUA supported, retry */
+		return SCSI_DH_RETRY;
 	}
-
 	switch (h->state) {
+	case TPGS_STATE_OPTIMIZED:
+		return SCSI_DH_OK;
 	case TPGS_STATE_NONOPTIMIZED:
-		stpg = 1;
 		if ((h->flags & ALUA_OPTIMIZE_STPG) &&
 		    !h->pref &&
 		    (h->tpgs & TPGS_MODE_IMPLICIT))
-			stpg = 0;
+			return SCSI_DH_OK;
 		break;
 	case TPGS_STATE_STANDBY:
 	case TPGS_STATE_UNAVAILABLE:
-		stpg = 1;
 		break;
 	case TPGS_STATE_OFFLINE:
-		err = SCSI_DH_IO;
+		return SCSI_DH_IO;
 		break;
 	case TPGS_STATE_TRANSITIONING:
-		err = SCSI_DH_RETRY;
 		break;
 	default:
+		sdev_printk(KERN_INFO, sdev,
+			    "%s: stpg failed, unhandled TPGS state %d",
+			    ALUA_DH_NAME, h->state);
+		return SCSI_DH_NOSYS;
 		break;
 	}
+	/* Set state to transitioning */
+	h->state = TPGS_STATE_TRANSITIONING;
+	retval = submit_stpg(sdev, h->group_id, h->sense);
 
-	if (stpg) {
-		h->callback_fn = fn;
-		h->callback_data = data;
-		err = submit_stpg(h);
-		if (err != SCSI_DH_OK)
-			h->callback_fn = h->callback_data = NULL;
+	if (retval) {
+		if (!scsi_normalize_sense(h->sense, SCSI_SENSE_BUFFERSIZE,
+					  &sense_hdr)) {
+			sdev_printk(KERN_INFO, sdev,
+				    "%s: stpg failed, result %d",
+				    ALUA_DH_NAME, retval);
+			if (driver_byte(retval) == DRIVER_BUSY)
+				return SCSI_DH_DEV_TEMP_BUSY;
+		} else {
+			sdev_printk(KERN_INFO, h->sdev, "%s: stpg failed\n",
+				    ALUA_DH_NAME);
+			scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
+		}
 	}
-	return err;
+	/* Retry RTPG */
+	return SCSI_DH_RETRY;
 }
 
 /*
@@ -748,10 +716,9 @@  static int alua_activate(struct scsi_device *sdev,
 	if (optimize_stpg)
 		h->flags |= ALUA_OPTIMIZE_STPG;
 
-	err = alua_stpg(sdev, h, fn, data);
-
+	err = alua_stpg(sdev, h);
 out:
-	if (err != SCSI_DH_OK && fn)
+	if (fn)
 		fn(data, err);
 	return 0;
 }