diff mbox

[v2] opensm: Multicast root switch calculation

Message ID 20100127104503.GM26338@me (mailing list archive)
State Not Applicable, archived
Headers show

Commit Message

Sasha Khapyorsky Jan. 27, 2010, 10:45 a.m. UTC
None
diff mbox

Patch

diff --git a/opensm/include/opensm/osm_switch.h b/opensm/include/opensm/osm_switch.h
index 205896d..cb6e5ac 100644
--- a/opensm/include/opensm/osm_switch.h
+++ b/opensm/include/opensm/osm_switch.h
@@ -109,6 +109,9 @@  typedef struct osm_switch {
 	unsigned endport_links;
 	unsigned need_update;
 	void *priv;
+	cl_map_item_t mgrp_item;
+	uint32_t num_of_mcm;
+	uint8_t is_mc_member;
 } osm_switch_t;
 /*
 * FIELDS
@@ -151,6 +154,15 @@  typedef struct osm_switch {
 *		When set indicates that switch was probably reset, so
 *		fwd tables and rest cached data should be flushed
 *
+*	mgrp_item
+*		map item for switch in building mcast tree
+*
+*	num_of_mcm
+*		number of mcast members(ports) connected to switch
+*
+*	is_mc_member
+*		whether switch is a mcast member itself
+*
 * SEE ALSO
 *	Switch object
 *********/
diff --git a/opensm/opensm/osm_mcast_mgr.c b/opensm/opensm/osm_mcast_mgr.c
index dce9f2b..5c9d0bc 100644
--- a/opensm/opensm/osm_mcast_mgr.c
+++ b/opensm/opensm/osm_mcast_mgr.c
@@ -157,50 +157,119 @@  static void mcast_mgr_purge_tree(osm_sm_t * sm, IN osm_mgrp_box_t * mbox)
 	OSM_LOG_EXIT(sm->p_log);
 }
 
-static float osm_mcast_mgr_compute_avg_hops(osm_sm_t * sm, cl_qlist_t * l,
-					    const osm_switch_t * p_sw)
+static void mcast_mgr_build_switch_map(osm_sm_t * sm,
+				       const cl_qlist_t * port_list,
+				       cl_qmap_t * p_mcast_member_sw_tbl)
 {
-	float avg_hops = 0;
-	uint32_t hops = 0;
-	uint32_t num_ports = 0;
-	cl_list_item_t *i;
+	osm_switch_t *remote_sw;
+	cl_list_item_t *list_item;
+	osm_port_t *p_port;
+	ib_net64_t port_guid;
+	osm_physp_t *p_physp_remote;
+	osm_node_t *remote_node;
 	osm_mcast_work_obj_t *wobj;
 
 	OSM_LOG_ENTER(sm->p_log);
 
-	/*
-	   For each member of the multicast group, compute the
-	   number of hops to its base LID.
-	 */
-	for (i = cl_qlist_head(l); i != cl_qlist_end(l); i = cl_qlist_next(i)) {
-		wobj = cl_item_obj(i, wobj, list_item);
-		hops += osm_switch_get_port_least_hops(p_sw, wobj->p_port);
-		num_ports++;
+	cl_qmap_init(p_mcast_member_sw_tbl);
+	for (list_item = cl_qlist_head(port_list);
+	     list_item != cl_qlist_end(port_list);
+	     list_item = cl_qlist_next(list_item)) {
+		wobj = cl_item_obj(list_item, wobj, list_item);
+		p_port = wobj->p_port;
+		if (!p_port)
+			continue;
+		if (p_port->p_node->sw) {
+			/* for switches - remote switch would be the switch itself */
+			remote_node = osm_physp_get_node_ptr(p_port->p_physp);
+		} else {
+			p_physp_remote = osm_physp_get_remote(p_port->p_physp);
+			remote_node = osm_physp_get_node_ptr(p_physp_remote);
+		}
+		/* get the remote switch of the mcmember */
+		remote_sw = remote_node->sw;
+		port_guid = osm_node_get_node_guid(remote_node);
+		if (cl_qmap_get(p_mcast_member_sw_tbl, port_guid) ==
+			cl_qmap_end(p_mcast_member_sw_tbl)) {
+				/* insert switch to table */
+				cl_qmap_insert(p_mcast_member_sw_tbl, port_guid, &remote_sw->mgrp_item);
+				/* New element in the table */
+				if (osm_node_get_type(p_port->p_node) == IB_NODE_TYPE_CA)
+					/* for HCA update the MC count on the remote switch */
+					remote_sw->num_of_mcm++;
+				else
+					/* the switch is MC memeber */
+					remote_sw->is_mc_member = 1;
+		}
 	}
+	OSM_LOG_EXIT(sm->p_log);
+}
 
-	/*
-	   We should be here if there aren't any ports in the group.
-	 */
-	CL_ASSERT(num_ports);
+static void mcast_mgr_destroy_switch_map(osm_sm_t * sm,
+			cl_qmap_t *p_mcast_member_sw_tbl)
+{
+	cl_map_item_t *p_item;
+	osm_switch_t *p_sw;
 
-	if (num_ports != 0)
-		avg_hops = (float)(hops / num_ports);
+	OSM_LOG_ENTER(sm->p_log);
 
+	p_item = cl_qmap_head(p_mcast_member_sw_tbl);
+	while (p_item != cl_qmap_end(p_mcast_member_sw_tbl)) {
+		p_sw = PARENT_STRUCT(p_item, osm_switch_t, mgrp_item);
+		p_sw->num_of_mcm = 0;
+		p_sw->is_mc_member = 0;
+		p_item = cl_qmap_next(p_item);
+	}
+	cl_qmap_remove_all(p_mcast_member_sw_tbl);
 	OSM_LOG_EXIT(sm->p_log);
-	return avg_hops;
 }
 
 /**********************************************************************
  Calculate the maximal "min hops" from the given switch to any
  of the group HCAs
  **********************************************************************/
-static float osm_mcast_mgr_compute_max_hops(osm_sm_t * sm, cl_qlist_t * l,
-					    const osm_switch_t * p_sw)
+#ifdef OSM_VENDOR_INTF_ANAFA
+static float osm_mcast_mgr_compute_avg_hops(osm_sm_t * sm, cl_qmap_t * m,
+					    const osm_switch_t * this_sw)
 {
-	uint32_t max_hops = 0;
+	float avg_hops = 0;
 	uint32_t hops = 0;
-	cl_list_item_t *i;
-	osm_mcast_work_obj_t *wobj;
+	uint32_t num_ports = 0;
+	uint16_t lid;
+	uint32_t least_hops;
+	cl_map_item_t *i;
+	osm_switch_t *sw;
+
+	OSM_LOG_ENTER(sm->p_log);
+
+	for (i = cl_qmap_head(m); i != cl_qmap_end(m); i = cl_qmap_next(i)) {
+		sw = cl_item_obj(i, sw, mcast_item);
+		lid = cl_ntoh16(osm_node_get_base_lid(sw->p_node, 0));
+		least_hops = osm_switch_get_least_hops(this_sw, lid);
+		/* for all host that are MC members and attached to the switch,
+		   we should add the (least_hops + 1) * number_of_such_hosts.
+		   If switch itself is in the MC, we should add the least_hops only */
+		hops += (least_hops + 1) * sw->num_of_mcm +
+		    least_hops * sw->is_mc_member;
+		num_ports += sw->num_of_mcm + sw->is_mc_member;
+	}
+
+	/* We should be here if there aren't any ports in the group. */
+	CL_ASSERT(num_ports);
+
+	avg_hops = (float)(hops / num_ports);
+
+	OSM_LOG_EXIT(sm->p_log);
+	return avg_hops;
+}
+#else
+static float osm_mcast_mgr_compute_max_hops(osm_sm_t * sm, cl_qmap_t * m,
+					    const osm_switch_t * this_sw)
+{
+	uint32_t max_hops = 0, hops;
+	uint16_t lid;
+	cl_map_item_t *i;
+	osm_switch_t *sw;
 
 	OSM_LOG_ENTER(sm->p_log);
 
@@ -208,9 +277,11 @@  static float osm_mcast_mgr_compute_max_hops(osm_sm_t * sm, cl_qlist_t * l,
 	   For each member of the multicast group, compute the
 	   number of hops to its base LID.
 	 */
-	for (i = cl_qlist_head(l); i != cl_qlist_end(l); i = cl_qlist_next(i)) {
-		wobj = cl_item_obj(i, wobj, list_item);
-		hops = osm_switch_get_port_least_hops(p_sw, wobj->p_port);
+	for (i = cl_qmap_head(m); i != cl_qmap_end(m); i = cl_qmap_next(i)) {
+		sw = cl_item_obj(i, sw, mgrp_item);
+		lid = cl_ntoh16(osm_node_get_base_lid(sw->p_node, 0));
+		hops = osm_switch_get_least_hops(this_sw, lid);
+		hops = (hops + 1) * sw->num_of_mcm + hops * sw->is_mc_member;
 		if (hops > max_hops)
 			max_hops = hops;
 	}
@@ -222,6 +293,7 @@  static float osm_mcast_mgr_compute_max_hops(osm_sm_t * sm, cl_qlist_t * l,
 	OSM_LOG_EXIT(sm->p_log);
 	return (float)max_hops;
 }
+#endif
 
 /**********************************************************************
    This function attempts to locate the optimal switch for the
@@ -230,32 +302,30 @@  static float osm_mcast_mgr_compute_max_hops(osm_sm_t * sm, cl_qlist_t * l,
    of the multicast group.
 **********************************************************************/
 static osm_switch_t *mcast_mgr_find_optimal_switch(osm_sm_t * sm,
-						   cl_qlist_t *list)
+						   cl_qlist_t * list)
 {
+	cl_qmap_t mgrp_sw_map;
 	cl_qmap_t *p_sw_tbl;
 	osm_switch_t *p_sw, *p_best_sw = NULL;
 	float hops = 0;
 	float best_hops = 10000;	/* any big # will do */
-#ifdef OSM_VENDOR_INTF_ANAFA
-	boolean_t use_avg_hops = TRUE;	/* anafa2 - bug hca on switch *//* use max hops for root */
-#else
-	boolean_t use_avg_hops = FALSE;	/* use max hops for root */
-#endif
 
 	OSM_LOG_ENTER(sm->p_log);
 
 	p_sw_tbl = &sm->p_subn->sw_guid_tbl;
 
+	mcast_mgr_build_switch_map(sm, list, &mgrp_sw_map);
 	for (p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
 	     p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl);
 	     p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item)) {
 		if (!osm_switch_supports_mcast(p_sw))
 			continue;
 
-		if (use_avg_hops)
-			hops = osm_mcast_mgr_compute_avg_hops(sm, list, p_sw);
-		else
-			hops = osm_mcast_mgr_compute_max_hops(sm, list, p_sw);
+#ifdef OSM_VENDOR_INTF_ANAFA
+		hops = osm_mcast_mgr_compute_avg_hops(sm, &mgrp_sw_map, p_sw);
+#else
+		hops = osm_mcast_mgr_compute_max_hops(sm, &mgrp_sw_map, p_sw);
+#endif
 
 		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
 			"Switch 0x%016" PRIx64 ", hops = %f\n",
@@ -276,6 +346,7 @@  static osm_switch_t *mcast_mgr_find_optimal_switch(osm_sm_t * sm,
 		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
 			"No multicast capable switches detected\n");
 
+	mcast_mgr_destroy_switch_map(sm, &mgrp_sw_map);
 	OSM_LOG_EXIT(sm->p_log);
 	return p_best_sw;
 }