[1/1] target: replace ALUA transition wait/flush with mutex
diff mbox

Message ID 1490764764-6633-1-git-send-email-mchristi@redhat.com
State New, archived
Headers show

Commit Message

Michael Christie March 29, 2017, 5:19 a.m. UTC
Multiple threads could be writing to alua_access_state at
the same time, or there could be multiple STPGs in flight
(different initiators sending them or one initiator sending
them to different ports), or a combo of both and the
core_alua_do_transition_tg_pt calls will race with each other.

Because from the last patches we no longer delay running
core_alua_do_transition_tg_pt_work, there does not seem to be
any point in running that in a workqueue. And, we always
wait for it to complete one way or another, so we can sleep
in this code path. So, this patch made over target-pending just adds a
mutex and does the work core_alua_do_transition_tg_pt_work was doing in
core_alua_do_transition_tg_pt.

There is also no need to use an atomic for the
tg_pt_gp_alua_access_state. In core_alua_do_transition_tg_pt we will
test and set it under the transition mutex. And, it is a int/32 bits
so in the other places where it is read, we will never see it partially
updated.

Signed-off-by: Mike Christie <mchristi@redhat.com>
---
 drivers/target/target_core_alua.c     | 136 ++++++++++++----------------------
 drivers/target/target_core_configfs.c |   2 +-
 include/target/target_core_base.h     |   8 +-
 3 files changed, 51 insertions(+), 95 deletions(-)

Comments

Nicholas A. Bellinger March 31, 2017, 6:18 a.m. UTC | #1
On Wed, 2017-03-29 at 00:19 -0500, Mike Christie wrote:
> Multiple threads could be writing to alua_access_state at
> the same time, or there could be multiple STPGs in flight
> (different initiators sending them or one initiator sending
> them to different ports), or a combo of both and the
> core_alua_do_transition_tg_pt calls will race with each other.
> 
> Because from the last patches we no longer delay running
> core_alua_do_transition_tg_pt_work, there does not seem to be
> any point in running that in a workqueue. And, we always
> wait for it to complete one way or another, so we can sleep
> in this code path. So, this patch made over target-pending just adds a
> mutex and does the work core_alua_do_transition_tg_pt_work was doing in
> core_alua_do_transition_tg_pt.
> 
> There is also no need to use an atomic for the
> tg_pt_gp_alua_access_state. In core_alua_do_transition_tg_pt we will
> test and set it under the transition mutex. And, it is a int/32 bits
> so in the other places where it is read, we will never see it partially
> updated.
> 
> Signed-off-by: Mike Christie <mchristi@redhat.com>
> ---
>  drivers/target/target_core_alua.c     | 136 ++++++++++++----------------------
>  drivers/target/target_core_configfs.c |   2 +-
>  include/target/target_core_base.h     |   8 +-
>  3 files changed, 51 insertions(+), 95 deletions(-)

Applied to target-pending/master.  Thanks MNC.

Btw, I changed the subject to be a little more descriptive about it
being a bug-fix:

  target: Fix ALUA transition state race between multiple initiators

Also, adding a HARE CC', and this was his original domain..

Dr. Hannes, any objections..?

--
To unsubscribe from this list: send the line "unsubscribe target-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch
diff mbox

diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index fd7c16a..fc4a9c3 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -197,8 +197,7 @@  target_emulate_report_target_port_groups(struct se_cmd *cmd)
 		/*
 		 * Set the ASYMMETRIC ACCESS State
 		 */
-		buf[off++] |= (atomic_read(
-			&tg_pt_gp->tg_pt_gp_alua_access_state) & 0xff);
+		buf[off++] |= tg_pt_gp->tg_pt_gp_alua_access_state & 0xff;
 		/*
 		 * Set supported ASYMMETRIC ACCESS State bits
 		 */
@@ -710,7 +709,7 @@  target_alua_state_check(struct se_cmd *cmd)
 
 	spin_lock(&lun->lun_tg_pt_gp_lock);
 	tg_pt_gp = lun->lun_tg_pt_gp;
-	out_alua_state = atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state);
+	out_alua_state = tg_pt_gp->tg_pt_gp_alua_access_state;
 	nonop_delay_msecs = tg_pt_gp->tg_pt_gp_nonop_delay_msecs;
 
 	// XXX: keeps using tg_pt_gp witout reference after unlock
@@ -911,7 +910,7 @@  static int core_alua_write_tpg_metadata(
 }
 
 /*
- * Called with tg_pt_gp->tg_pt_gp_md_mutex held
+ * Called with tg_pt_gp->tg_pt_gp_transition_mutex held
  */
 static int core_alua_update_tpg_primary_metadata(
 	struct t10_alua_tg_pt_gp *tg_pt_gp)
@@ -934,7 +933,7 @@  static int core_alua_update_tpg_primary_metadata(
 			"alua_access_state=0x%02x\n"
 			"alua_access_status=0x%02x\n",
 			tg_pt_gp->tg_pt_gp_id,
-			tg_pt_gp->tg_pt_gp_alua_pending_state,
+			tg_pt_gp->tg_pt_gp_alua_access_state,
 			tg_pt_gp->tg_pt_gp_alua_access_status);
 
 	snprintf(path, ALUA_METADATA_PATH_LEN,
@@ -1013,93 +1012,41 @@  static void core_alua_queue_state_change_ua(struct t10_alua_tg_pt_gp *tg_pt_gp)
 	spin_unlock(&tg_pt_gp->tg_pt_gp_lock);
 }
 
-static void core_alua_do_transition_tg_pt_work(struct work_struct *work)
-{
-	struct t10_alua_tg_pt_gp *tg_pt_gp = container_of(work,
-		struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work);
-	struct se_device *dev = tg_pt_gp->tg_pt_gp_dev;
-	bool explicit = (tg_pt_gp->tg_pt_gp_alua_access_status ==
-			 ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG);
-
-	/*
-	 * Update the ALUA metadata buf that has been allocated in
-	 * core_alua_do_port_transition(), this metadata will be written
-	 * to struct file.
-	 *
-	 * Note that there is the case where we do not want to update the
-	 * metadata when the saved metadata is being parsed in userspace
-	 * when setting the existing port access state and access status.
-	 *
-	 * Also note that the failure to write out the ALUA metadata to
-	 * struct file does NOT affect the actual ALUA transition.
-	 */
-	if (tg_pt_gp->tg_pt_gp_write_metadata) {
-		mutex_lock(&tg_pt_gp->tg_pt_gp_md_mutex);
-		core_alua_update_tpg_primary_metadata(tg_pt_gp);
-		mutex_unlock(&tg_pt_gp->tg_pt_gp_md_mutex);
-	}
-	/*
-	 * Set the current primary ALUA access state to the requested new state
-	 */
-	atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
-		   tg_pt_gp->tg_pt_gp_alua_pending_state);
-
-	pr_debug("Successful %s ALUA transition TG PT Group: %s ID: %hu"
-		" from primary access state %s to %s\n", (explicit) ? "explicit" :
-		"implicit", config_item_name(&tg_pt_gp->tg_pt_gp_group.cg_item),
-		tg_pt_gp->tg_pt_gp_id,
-		core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_previous_state),
-		core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_pending_state));
-
-	core_alua_queue_state_change_ua(tg_pt_gp);
-
-	spin_lock(&dev->t10_alua.tg_pt_gps_lock);
-	atomic_dec(&tg_pt_gp->tg_pt_gp_ref_cnt);
-	spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
-
-	if (tg_pt_gp->tg_pt_gp_transition_complete)
-		complete(tg_pt_gp->tg_pt_gp_transition_complete);
-}
-
 static int core_alua_do_transition_tg_pt(
 	struct t10_alua_tg_pt_gp *tg_pt_gp,
 	int new_state,
 	int explicit)
 {
-	struct se_device *dev = tg_pt_gp->tg_pt_gp_dev;
-	DECLARE_COMPLETION_ONSTACK(wait);
+	int prev_state;
 
+	mutex_lock(&tg_pt_gp->tg_pt_gp_transition_mutex);
 	/* Nothing to be done here */
-	if (atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == new_state)
+	if (tg_pt_gp->tg_pt_gp_alua_access_state == new_state) {
+		mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
 		return 0;
+	}
 
-	if (explicit && new_state == ALUA_ACCESS_STATE_TRANSITION)
+	if (explicit && new_state == ALUA_ACCESS_STATE_TRANSITION) {
+		mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
 		return -EAGAIN;
-
-	/*
-	 * Flush any pending transitions
-	 */
-	if (!explicit)
-		flush_work(&tg_pt_gp->tg_pt_gp_transition_work);
+	}
 
 	/*
 	 * Save the old primary ALUA access state, and set the current state
 	 * to ALUA_ACCESS_STATE_TRANSITION.
 	 */
-	atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
-			ALUA_ACCESS_STATE_TRANSITION);
+	prev_state = tg_pt_gp->tg_pt_gp_alua_access_state;
+	tg_pt_gp->tg_pt_gp_alua_access_state = ALUA_ACCESS_STATE_TRANSITION;
 	tg_pt_gp->tg_pt_gp_alua_access_status = (explicit) ?
 				ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG :
 				ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA;
 
 	core_alua_queue_state_change_ua(tg_pt_gp);
 
-	if (new_state == ALUA_ACCESS_STATE_TRANSITION)
+	if (new_state == ALUA_ACCESS_STATE_TRANSITION) {
+		mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
 		return 0;
-
-	tg_pt_gp->tg_pt_gp_alua_previous_state =
-		atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state);
-	tg_pt_gp->tg_pt_gp_alua_pending_state = new_state;
+	}
 
 	/*
 	 * Check for the optional ALUA primary state transition delay
@@ -1108,19 +1055,36 @@  static int core_alua_do_transition_tg_pt(
 		msleep_interruptible(tg_pt_gp->tg_pt_gp_trans_delay_msecs);
 
 	/*
-	 * Take a reference for workqueue item
+	 * Set the current primary ALUA access state to the requested new state
 	 */
-	spin_lock(&dev->t10_alua.tg_pt_gps_lock);
-	atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt);
-	spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
+	tg_pt_gp->tg_pt_gp_alua_access_state = new_state;
 
-	schedule_work(&tg_pt_gp->tg_pt_gp_transition_work);
-	if (explicit) {
-		tg_pt_gp->tg_pt_gp_transition_complete = &wait;
-		wait_for_completion(&wait);
-		tg_pt_gp->tg_pt_gp_transition_complete = NULL;
+	/*
+	 * Update the ALUA metadata buf that has been allocated in
+	 * core_alua_do_port_transition(), this metadata will be written
+	 * to struct file.
+	 *
+	 * Note that there is the case where we do not want to update the
+	 * metadata when the saved metadata is being parsed in userspace
+	 * when setting the existing port access state and access status.
+	 *
+	 * Also note that the failure to write out the ALUA metadata to
+	 * struct file does NOT affect the actual ALUA transition.
+	 */
+	if (tg_pt_gp->tg_pt_gp_write_metadata) {
+		core_alua_update_tpg_primary_metadata(tg_pt_gp);
 	}
 
+	pr_debug("Successful %s ALUA transition TG PT Group: %s ID: %hu"
+		" from primary access state %s to %s\n", (explicit) ? "explicit" :
+		"implicit", config_item_name(&tg_pt_gp->tg_pt_gp_group.cg_item),
+		tg_pt_gp->tg_pt_gp_id,
+		core_alua_dump_state(prev_state),
+		core_alua_dump_state(new_state));
+
+	core_alua_queue_state_change_ua(tg_pt_gp);
+
+	mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
 	return 0;
 }
 
@@ -1685,14 +1649,12 @@  struct t10_alua_tg_pt_gp *core_alua_allocate_tg_pt_gp(struct se_device *dev,
 	}
 	INIT_LIST_HEAD(&tg_pt_gp->tg_pt_gp_list);
 	INIT_LIST_HEAD(&tg_pt_gp->tg_pt_gp_lun_list);
-	mutex_init(&tg_pt_gp->tg_pt_gp_md_mutex);
+	mutex_init(&tg_pt_gp->tg_pt_gp_transition_mutex);
 	spin_lock_init(&tg_pt_gp->tg_pt_gp_lock);
 	atomic_set(&tg_pt_gp->tg_pt_gp_ref_cnt, 0);
-	INIT_WORK(&tg_pt_gp->tg_pt_gp_transition_work,
-		  core_alua_do_transition_tg_pt_work);
 	tg_pt_gp->tg_pt_gp_dev = dev;
-	atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
-		ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED);
+	tg_pt_gp->tg_pt_gp_alua_access_state =
+			ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED;
 	/*
 	 * Enable both explicit and implicit ALUA support by default
 	 */
@@ -1797,8 +1759,6 @@  void core_alua_free_tg_pt_gp(
 	dev->t10_alua.alua_tg_pt_gps_counter--;
 	spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
 
-	flush_work(&tg_pt_gp->tg_pt_gp_transition_work);
-
 	/*
 	 * Allow a struct t10_alua_tg_pt_gp_member * referenced by
 	 * core_alua_get_tg_pt_gp_by_name() in
@@ -1938,8 +1898,8 @@  ssize_t core_alua_show_tg_pt_gp_info(struct se_lun *lun, char *page)
 			"Primary Access Status: %s\nTG Port Secondary Access"
 			" State: %s\nTG Port Secondary Access Status: %s\n",
 			config_item_name(tg_pt_ci), tg_pt_gp->tg_pt_gp_id,
-			core_alua_dump_state(atomic_read(
-					&tg_pt_gp->tg_pt_gp_alua_access_state)),
+			core_alua_dump_state(
+				tg_pt_gp->tg_pt_gp_alua_access_state),
 			core_alua_dump_status(
 				tg_pt_gp->tg_pt_gp_alua_access_status),
 			atomic_read(&lun->lun_tg_pt_secondary_offline) ?
diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index 38b5025..70657fd 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -2392,7 +2392,7 @@  static ssize_t target_tg_pt_gp_alua_access_state_show(struct config_item *item,
 		char *page)
 {
 	return sprintf(page, "%d\n",
-		atomic_read(&to_tg_pt_gp(item)->tg_pt_gp_alua_access_state));
+		       to_tg_pt_gp(item)->tg_pt_gp_alua_access_state);
 }
 
 static ssize_t target_tg_pt_gp_alua_access_state_store(struct config_item *item,
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 4b784b6..fe0bb67 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -279,8 +279,6 @@  struct t10_alua_tg_pt_gp {
 	u16	tg_pt_gp_id;
 	int	tg_pt_gp_valid_id;
 	int	tg_pt_gp_alua_supported_states;
-	int	tg_pt_gp_alua_pending_state;
-	int	tg_pt_gp_alua_previous_state;
 	int	tg_pt_gp_alua_access_status;
 	int	tg_pt_gp_alua_access_type;
 	int	tg_pt_gp_nonop_delay_msecs;
@@ -289,18 +287,16 @@  struct t10_alua_tg_pt_gp {
 	int	tg_pt_gp_pref;
 	int	tg_pt_gp_write_metadata;
 	u32	tg_pt_gp_members;
-	atomic_t tg_pt_gp_alua_access_state;
+	int	tg_pt_gp_alua_access_state;
 	atomic_t tg_pt_gp_ref_cnt;
 	spinlock_t tg_pt_gp_lock;
-	struct mutex tg_pt_gp_md_mutex;
+	struct mutex tg_pt_gp_transition_mutex;
 	struct se_device *tg_pt_gp_dev;
 	struct config_group tg_pt_gp_group;
 	struct list_head tg_pt_gp_list;
 	struct list_head tg_pt_gp_lun_list;
 	struct se_lun *tg_pt_gp_alua_lun;
 	struct se_node_acl *tg_pt_gp_alua_nacl;
-	struct work_struct tg_pt_gp_transition_work;
-	struct completion *tg_pt_gp_transition_complete;
 };
 
 struct t10_vpd {