diff mbox

opensm: Fix crash during handover

Message ID 1393931268-15316-1-git-send-email-alexne@mellanox.com (mailing list archive)
State Accepted
Delegated to: Hal Rosenstock
Headers show

Commit Message

Alex Netes March 4, 2014, 11:07 a.m. UTC
Another MASTER SM with lower priority sends HANDOVER to our SM,
before our SM *starts* polling it.

In sm_state_mgr_start_polling() there is no validation whether
p_polling_sm is valid.

Signed-off-by: Alex Netes <alexne@mellanox.com>
---
 include/opensm/osm_sm.h   |    2 +-
 opensm/osm_drop_mgr.c     |    6 +++---
 opensm/osm_sm_state_mgr.c |   12 ++++++------
 opensm/osm_sminfo_rcv.c   |    2 +-
 opensm/osm_state_mgr.c    |    2 +-
 5 files changed, 12 insertions(+), 12 deletions(-)

Comments

Hal Rosenstock March 4, 2014, 1:28 p.m. UTC | #1
On 3/4/2014 6:07 AM, Alex Netes wrote:
> Another MASTER SM with lower priority sends HANDOVER to our SM,
> before our SM *starts* polling it.
> 
> In sm_state_mgr_start_polling() there is no validation whether
> p_polling_sm is valid.
> 
> Signed-off-by: Alex Netes <alexne@mellanox.com>

Thanks. Applied.

-- Hal
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/opensm/osm_sm.h b/include/opensm/osm_sm.h
index e48c549..94d1831 100644
--- a/include/opensm/osm_sm.h
+++ b/include/opensm/osm_sm.h
@@ -116,7 +116,7 @@  typedef struct osm_sm {
 	unsigned master_sm_found;
 	uint32_t retry_number;
 	ib_net64_t master_sm_guid;
-	osm_remote_sm_t *p_polling_sm;
+	ib_net64_t polling_sm_guid;
 	osm_subn_t *p_subn;
 	osm_db_t *p_db;
 	osm_vendor_t *p_vendor;
diff --git a/opensm/osm_drop_mgr.c b/opensm/osm_drop_mgr.c
index ff6a81b..c1cdc0d 100644
--- a/opensm/osm_drop_mgr.c
+++ b/opensm/osm_drop_mgr.c
@@ -257,9 +257,9 @@  static void drop_mgr_remove_port(osm_sm_t * sm, IN osm_port_t * p_port)
 		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
 			"Cleaned SM for port guid 0x%016" PRIx64 "\n",
 			cl_ntoh64(port_guid));
-		/* clean up the polling_sm pointer */
-		if (sm->p_polling_sm == p_sm)
-			sm->p_polling_sm = NULL;
+		/* clean up the polling_sm_guid */
+		if (sm->polling_sm_guid == p_sm->smi.guid)
+			sm->polling_sm_guid = 0;
 		free(p_sm);
 	}
 
diff --git a/opensm/osm_sm_state_mgr.c b/opensm/osm_sm_state_mgr.c
index 0660fb9..e5a11da 100644
--- a/opensm/osm_sm_state_mgr.c
+++ b/opensm/osm_sm_state_mgr.c
@@ -97,11 +97,11 @@  static boolean_t sm_state_mgr_send_master_sm_info_req(osm_sm_t * sm, uint8_t sm_
 	} else {
 		/*
 		 * We are not in STANDBY - this means we are in MASTER state -
-		 * so we need to poll the SM that is saved in p_polling_sm
+		 * so we need to poll the SM that is saved in polling_sm_guid
 		 * under sm.
 		 * Send a query of SubnGet(SMInfo) to that SM.
 		 */
-		guid = sm->p_polling_sm->smi.guid;
+		guid = sm->polling_sm_guid;
 	}
 
 	/* Verify that SM is not polling itself */
@@ -198,7 +198,7 @@  void osm_sm_state_mgr_polling_callback(IN void *context)
 	 * If we are not in one of these cases - don't need to restart the poller.
 	 */
 	if (!((sm_state == IB_SMINFO_STATE_MASTER &&
-	       sm->p_polling_sm != NULL) ||
+	       sm->polling_sm_guid != 0) ||
 	      sm_state == IB_SMINFO_STATE_STANDBY)) {
 		CL_PLOCK_RELEASE(sm->p_lock);
 		goto Exit;
@@ -426,7 +426,7 @@  ib_api_status_t osm_sm_state_mgr_process(osm_sm_t * sm,
 			 * We want to force a heavy sweep - hopefully this
 			 * occurred because the remote sm died, and we'll find
 			 * this out and configure the subnet after a heavy sweep.
-			 * We also want to clear the p_polling_sm object - since
+			 * We also want to clear the polling_sm_guid - since
 			 * we are done polling on that remote sm - we are
 			 * sweeping again.
 			 */
@@ -438,7 +438,7 @@  ib_api_status_t osm_sm_state_mgr_process(osm_sm_t * sm,
 			 * change, or we are in idle state - since we
 			 * recognized a master SM before - so we want to make a
 			 * heavy sweep and reconfigure the new subnet.
-			 * We also want to clear the p_polling_sm object - since
+			 * We also want to clear the polling_sm_guid - since
 			 * we are done polling on that remote sm - we got a
 			 * handover from it.
 			 */
@@ -449,7 +449,7 @@  ib_api_status_t osm_sm_state_mgr_process(osm_sm_t * sm,
 			 * SM may have configure/done on the fabric.
 			 */
 			sm->p_subn->set_client_rereg_on_sweep = TRUE;
-			sm->p_polling_sm = NULL;
+			sm->polling_sm_guid = 0;
 			sm->p_subn->force_heavy_sweep = TRUE;
 			osm_sm_signal(sm, OSM_SIGNAL_SWEEP);
 			break;
diff --git a/opensm/osm_sminfo_rcv.c b/opensm/osm_sminfo_rcv.c
index 66ad410..9f62f9f 100644
--- a/opensm/osm_sminfo_rcv.c
+++ b/opensm/osm_sminfo_rcv.c
@@ -392,7 +392,7 @@  static void smi_rcv_process_get_sm(IN osm_sm_t * sm,
 			 * as it might not get it and we don't want to wait for a HANDOVER
 			 * forever.
 			 */
-			if (sm->p_polling_sm) {
+			if (sm->polling_sm_guid) {
 				if (smi_rcv_remote_sm_is_higher(sm, p_smi))
 					sm->p_subn->force_heavy_sweep = TRUE;
 				else
diff --git a/opensm/osm_state_mgr.c b/opensm/osm_state_mgr.c
index f9b20e2..c4f4978 100644
--- a/opensm/osm_state_mgr.c
+++ b/opensm/osm_state_mgr.c
@@ -1386,7 +1386,7 @@  repeat_discovery:
 				 * need to wait for that SM to relinquish control
 				 * of its portion of the subnet. C14-60.2.1.
 				 * Also - need to start polling on that SM. */
-				sm->p_polling_sm = p_remote_sm;
+				sm->polling_sm_guid = p_remote_sm->smi.guid;
 				osm_sm_state_mgr_process(sm,
 							 OSM_SM_SIGNAL_WAIT_FOR_HANDOVER);
 				return;