@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
* Copyright (c) 2002-2009 Mellanox Technologies LTD. All rights reserved.
* Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
*
@@ -109,6 +109,9 @@ typedef struct osm_switch {
unsigned endport_links;
unsigned need_update;
void *priv;
+ cl_map_item_t mcast_item;
+ uint32_t num_of_mcm;
+ uint8_t is_mc_member;
} osm_switch_t;
/*
* FIELDS
@@ -151,6 +154,15 @@ typedef struct osm_switch {
* When set indicates that switch was probably reset, so
* fwd tables and rest cached data should be flushed
*
+* mcast_item
+* map item for switch in building mcast tree
+*
+* num_of_mcm
+* number of mcast members(ports) connected to switch
+*
+* is_mc_member
+* whether switch is a mcast member itself
+*
* SEE ALSO
* Switch object
*********/
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
* Copyright (c) 2002-2009 Mellanox Technologies LTD. All rights reserved.
* Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
* Copyright (c) 2008 Xsigo Systems Inc. All rights reserved.
@@ -203,25 +203,132 @@ static float osm_mcast_mgr_compute_max_hops(osm_sm_t * sm,
return (float)max_hops;
}
+static void mcast_mgr_build_switch_map(osm_sm_t * sm,
+ const osm_mgrp_t * p_mgrp,
+ cl_qmap_t *p_mcast_member_sw_tbl)
+{
+ osm_switch_t *remote_sw;
+ const osm_mcm_port_t *p_mcm_port;
+ const cl_qmap_t *p_mcm_tbl;
+ osm_port_t *p_port;
+ ib_net64_t port_guid;
+ osm_physp_t *p_physp_remote;
+ osm_node_t *remote_node;
+
+ OSM_LOG_ENTER(sm->p_log);
+
+ cl_qmap_init(p_mcast_member_sw_tbl);
+ p_mcm_tbl = &p_mgrp->mcm_port_tbl;
+ for (p_mcm_port = (osm_mcm_port_t *) cl_qmap_head(p_mcm_tbl);
+ p_mcm_port != (osm_mcm_port_t *) cl_qmap_end(p_mcm_tbl);
+ p_mcm_port = (osm_mcm_port_t *) cl_qmap_next(&p_mcm_port->map_item)) {
+ p_port = (osm_port_t*)osm_get_port_by_guid(sm->p_subn,
+ ib_gid_get_guid(&p_mcm_port->port_gid));
+ if (!p_port)
+ continue;
+ if (osm_node_get_type(p_port->p_node) == IB_NODE_TYPE_CA) {
+ p_physp_remote = osm_physp_get_remote(p_port->p_physp);
+ remote_node = osm_physp_get_node_ptr(p_physp_remote);
+ }
+ else {
+ /* for switches - remote switch would be the switch itself*/
+ remote_node = osm_physp_get_node_ptr( p_port->p_physp);
+ }
+ /* get the remote switch of the mcmember */
+ remote_sw = remote_node->sw;
+ port_guid = osm_node_get_node_guid(remote_node);
+ if (cl_qmap_get(p_mcast_member_sw_tbl, port_guid) ==
+ cl_qmap_end(p_mcast_member_sw_tbl)) {
+ /* insert switch to table */
+ cl_qmap_insert(p_mcast_member_sw_tbl, port_guid, &remote_sw->mcast_item);
+ /* New element in the table */
+ if (osm_node_get_type(p_port->p_node) == IB_NODE_TYPE_CA) {
+ /* for HCA update the MC count on the remote switch */
+ remote_sw->num_of_mcm++;
+ }
+ else
+ remote_sw->is_mc_member = 1; /* the switch is MC memeber */
+ }
+ }
+ OSM_LOG_EXIT(sm->p_log);
+}
+
+static void mcast_mgr_destroy_switch_map(osm_sm_t * sm,
+ cl_qmap_t *p_mcast_member_sw_tbl)
+{
+ cl_map_item_t *p_item;
+ osm_switch_t *p_sw;
+
+ OSM_LOG_ENTER(sm->p_log);
+
+ p_item = cl_qmap_head(p_mcast_member_sw_tbl);
+ while (p_item != cl_qmap_end(p_mcast_member_sw_tbl)) {
+ p_sw = PARENT_STRUCT(p_item, osm_switch_t, mcast_item);
+ p_sw->num_of_mcm = 0;
+ p_sw->is_mc_member = 0;
+ p_item = cl_qmap_next(p_item);
+ }
+ cl_qmap_remove_all(p_mcast_member_sw_tbl);
+ OSM_LOG_EXIT(sm->p_log);
+}
+
+static float
+osm_mcast_mgr_compute_avg_hops_weight(osm_sm_t * sm,
+ const osm_switch_t *const p_sw_cur,
+ const cl_qmap_t *p_sw_tbl )
+{
+ float avg_hops_weight = 0;
+ uint32_t hops = 0;
+ uint32_t num_ports = 0;
+ uint16_t base_lid_ho;
+ uint32_t tmp_hops;
+ uint32_t least_hops;
+ osm_switch_t *p_sw;
+ cl_map_item_t *p_item;
+
+ OSM_LOG_ENTER(sm->p_log);
+ /*
+ For each member of the multicast group, compute the
+ number of hops to its base LID.
+ */
+ for( p_item = cl_qmap_head( p_sw_tbl);
+ p_item != cl_qmap_end( p_sw_tbl);
+ p_item = cl_qmap_next(p_item )) {
+ p_sw = (osm_switch_t*)PARENT_STRUCT(p_item, osm_switch_t, mcast_item);
+ base_lid_ho = cl_ntoh16( osm_node_get_base_lid(p_sw->p_node,0 ));
+ least_hops = osm_switch_get_least_hops( p_sw_cur, base_lid_ho );
+ /* for all host that are MC members and attached to the switch,
+ we should add the (least_hops + 1) * number_of_such_hosts.
+ If switch itself is in the MC, we should add the least_hops only */
+ tmp_hops = (least_hops + 1) * p_sw->num_of_mcm + least_hops * p_sw->is_mc_member;
+ hops += tmp_hops;
+ num_ports+=p_sw->num_of_mcm + p_sw->is_mc_member;
+ }
+
+ CL_ASSERT( num_ports );
+ if( num_ports != 0 ) {
+ avg_hops_weight = (hops ) / num_ports;
+ }
+
+ OSM_LOG_EXIT(sm->p_log);
+ return( avg_hops_weight );
+}
+
/**********************************************************************
This function attempts to locate the optimal switch for the
center of the spanning tree. The current algorithm chooses
- a switch with the lowest average hop count to the members
+ a switch with the lowest average weight of the members
of the multicast group.
**********************************************************************/
static osm_switch_t *mcast_mgr_find_optimal_switch(osm_sm_t * sm,
- const osm_mgrp_t * p_mgrp)
+ const osm_mgrp_t * p_mgrp)
{
cl_qmap_t *p_sw_tbl;
- const osm_switch_t *p_sw;
+ osm_switch_t *p_sw;
const osm_switch_t *p_best_sw = NULL;
float hops = 0;
float best_hops = 10000; /* any big # will do */
-#ifdef OSM_VENDOR_INTF_ANAFA
- boolean_t use_avg_hops = TRUE; /* anafa2 - bug hca on switch *//* use max hops for root */
-#else
- boolean_t use_avg_hops = FALSE; /* use max hops for root */
-#endif
+ cl_qmap_t mcast_member_sw_tbl;
OSM_LOG_ENTER(sm->p_log);
@@ -229,16 +336,14 @@ static osm_switch_t *mcast_mgr_find_optimal_switch(osm_sm_t * sm,
CL_ASSERT(!osm_mgrp_is_empty(p_mgrp));
+ mcast_mgr_build_switch_map(sm, p_mgrp, &mcast_member_sw_tbl);
for (p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl);
p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item)) {
if (!osm_switch_supports_mcast(p_sw))
continue;
- if (use_avg_hops)
- hops = osm_mcast_mgr_compute_avg_hops(sm, p_mgrp, p_sw);
- else
- hops = osm_mcast_mgr_compute_max_hops(sm, p_mgrp, p_sw);
+ hops = osm_mcast_mgr_compute_avg_hops_weight(sm, p_sw, &mcast_member_sw_tbl);
OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
"Switch 0x%016" PRIx64 ", hops = %f\n",
@@ -259,6 +364,7 @@ static osm_switch_t *mcast_mgr_find_optimal_switch(osm_sm_t * sm,
OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
"No multicast capable switches detected\n");
+ mcast_mgr_destroy_switch_map(sm, &mcast_member_sw_tbl);
OSM_LOG_EXIT(sm->p_log);
return (osm_switch_t *) p_best_sw;
}