diff mbox

[10/12] opensm: Implement master spanning tree for torus-2QoS multicast support.

Message ID 1261169461-2516-10-git-send-email-jaschut@sandia.gov (mailing list archive)
State Not Applicable, archived
Headers show

Commit Message

Jim Schutt Dec. 18, 2009, 8:50 p.m. UTC
None
diff mbox

Patch

diff --git a/opensm/opensm/osm_ucast_torus.c b/opensm/opensm/osm_ucast_torus.c
index 61e0bf3..082fcf5 100644
--- a/opensm/opensm/osm_ucast_torus.c
+++ b/opensm/opensm/osm_ucast_torus.c
@@ -154,6 +154,19 @@  struct link {
  * type.  Furthermore, if that type is PASSTHRU, then the connected links:
  *   1) are parallel to a given coordinate direction
  *   2) share the same two switches as endpoints.
+ *
+ * Torus-2QoS uses one master spanning tree for multicast, of which every
+ * multicast group spanning tree is a subtree.  to_stree_root is a pointer
+ * to the next port_grp on the path to the master spanning tree root.
+ * to_stree_tip is a pointer to the next port_grp on the path to a master
+ * spanning tree branch tip.
+ *
+ * Each t_switch can have at most one port_grp with a non-NULL to_stree_root.
+ * Exactly one t_switch in the fabric will have all port_grp objects with
+ * to_stree_root NULL; it is the master spanning tree root.
+ *
+ * A t_switch with all port_grp objects where to_stree_tip is NULL is at a
+ * master spanning tree branch tip.
  */
 struct port_grp {
 	enum endpt_type type;
@@ -163,6 +176,8 @@  struct port_grp {
 	unsigned sw_dlid_cnt;	/* switch dlids routed through this group */
 	unsigned ca_dlid_cnt;	/* CA dlids routed through this group */
 	struct t_switch *sw;	/* what switch we're attached to */
+	struct port_grp *to_stree_root;
+	struct port_grp *to_stree_tip;
 	struct endpoint **port;
 };
 
@@ -8499,6 +8514,256 @@  bool torus_lft(struct torus *t, struct t_switch *sw)
 	return success;
 }
 
+static
+bool good_xy_ring(struct torus *t, int x, int y, int z)
+{
+	struct t_switch ****sw = t->sw;
+	bool good_ring = true;
+
+	for (x = 0; x < t->x_sz && good_ring; x++)
+		good_ring = sw[x][y][z];
+
+	for (y = 0; y < t->y_sz && good_ring; y++)
+		good_ring = sw[x][y][z];
+
+	return good_ring;
+}
+
+static
+struct t_switch *find_plane_mid(struct torus *t, int z)
+{
+	int x, dx, xm = t->x_sz / 2;
+	int y, dy, ym = t->y_sz / 2;
+	struct t_switch ****sw = t->sw;
+
+	if (good_xy_ring(t, xm, ym, z))
+		return sw[xm][ym][z];
+
+	for (dx = 1, dy = 1; dx <= xm && dy <= ym; dx++, dy++) {
+
+		x = canonicalize(xm - dx, t->x_sz);
+		y = canonicalize(ym - dy, t->y_sz);
+		if (good_xy_ring(t, x, y, z))
+			return sw[x][y][z];
+
+		x = canonicalize(xm + dx, t->x_sz);
+		y = canonicalize(ym + dy, t->y_sz);
+		if (good_xy_ring(t, x, y, z))
+			return sw[x][y][z];
+	}
+	return NULL;
+}
+
+static
+struct t_switch *find_stree_root(struct torus *t)
+{
+	int x, y, z, dz, zm = t->z_sz / 2;
+	struct t_switch ****sw = t->sw;
+	struct t_switch *root;
+	bool good_plane;
+
+	/*
+	 * Look for a switch near the "center" (wrt. the datelines) of the
+	 * torus, as that will be the most optimum spanning tree root.  Use
+	 * a search that is not exhaustive, on the theory that this routing
+	 * engine isn't useful anyway if too many switches are missing.
+	 *
+	 * Also, want to pick an x-y plane with no missing switches, so that
+	 * the master spanning tree construction algorithm doesn't have to
+	 * deal with needing a turn on a missing switch.
+	 */
+	for (dz = 0; dz <= zm; dz++) {
+
+		z = canonicalize(zm - dz, t->z_sz);
+		good_plane = true;
+		for (y = 0; y < t->y_sz && good_plane; y++)
+			for (x = 0; x < t->x_sz && good_plane; x++)
+				good_plane = sw[x][y][z];
+
+		if (good_plane) {
+			root = find_plane_mid(t, z);
+			if (root)
+				goto out;
+		}
+		if (!dz)
+			continue;
+
+		z = canonicalize(zm + dz, t->z_sz);
+		good_plane = true;
+		for (y = 0; y < t->y_sz && good_plane; y++)
+			for (x = 0; x < t->x_sz && good_plane; x++)
+				good_plane = sw[x][y][z];
+
+		if (good_plane) {
+			root = find_plane_mid(t, z);
+			if (root)
+				goto out;
+		}
+	}
+	/*
+	 * Note that torus-2QoS can route a torus that is missing an entire
+	 * column (switches with x,y constant, for all z values) without
+	 * deadlocks.
+	 *
+	 * if we've reached this point, we must have a column of missing
+	 * switches, as routable_torus() would have returned false for
+	 * any other configuration of missing switches that made it through
+	 * the above.
+	 *
+	 * So any switch in the mid-z plane will do as the root.
+	 */
+	root = find_plane_mid(t, zm);
+out:
+	return root;
+}
+
+static
+bool sw_in_master_stree(struct t_switch *sw)
+{
+	int g;
+	bool connected;
+
+	connected = sw == sw->torus->master_stree_root;
+	for (g = 0; g < 2 * TORUS_MAX_DIM; g++)
+		connected = connected || sw->ptgrp[g].to_stree_root;
+
+	return connected;
+}
+
+static
+void grow_master_stree_branch(struct t_switch *root, struct t_switch *tip,
+			      unsigned to_root_pg, unsigned to_tip_pg)
+{
+	root->ptgrp[to_tip_pg].to_stree_tip = &tip->ptgrp[to_root_pg];
+	tip->ptgrp[to_root_pg].to_stree_root = &root->ptgrp[to_tip_pg];
+}
+
+static
+void build_master_stree_branch(struct t_switch *branch_root, int cdir)
+{
+	struct t_switch *sw, *n_sw, *p_sw;
+	unsigned l, idx, cnt, pg, ng;
+
+	switch (cdir) {
+	case 0:
+		idx = branch_root->i;
+		cnt = branch_root->torus->x_sz;
+		break;
+	case 1:
+		idx = branch_root->j;
+		cnt = branch_root->torus->y_sz;
+		break;
+	case 2:
+		idx = branch_root->k;
+		cnt = branch_root->torus->z_sz;
+		break;
+	default:
+		goto out;
+	}
+	/*
+	 * This algorithm intends that a spanning tree branch never crosses
+	 * a dateline unless the 1-D ring for which we're building the branch
+	 * is interrupted by failure.  We need that guarantee to prevent
+	 * multicast/unicast credit loops.
+	 */
+	n_sw = branch_root;		/* tip of negative cdir branch */
+	ng = 2 * cdir;			/* negative cdir port group index */
+	p_sw = branch_root;		/* tip of positive cdir branch */
+	pg = 2 * cdir + 1;		/* positive cdir port group index */
+
+	for (l = idx; n_sw && l >= 1; l--) {
+		sw = ring_next_sw(n_sw, cdir, -1);
+		if (sw && !sw_in_master_stree(sw)) {
+			grow_master_stree_branch(n_sw, sw, pg, ng);
+			n_sw = sw;
+		} else
+			n_sw = NULL;
+	}
+	for (l = idx; p_sw && l < (cnt - 1); l++) {
+		sw = ring_next_sw(p_sw, cdir, 1);
+		if (sw && !sw_in_master_stree(sw)) {
+			grow_master_stree_branch(p_sw, sw, ng, pg);
+			p_sw = sw;
+		} else
+			p_sw = NULL;
+	}
+	if (n_sw && p_sw)
+		goto out;
+	/*
+	 * At least one branch couldn't grow to the dateline for this ring.
+	 * That means it is acceptable to grow the branch by crossing the
+	 * dateline.
+	 */
+	for (l = 0; l < cnt; l++) {
+		if (n_sw) {
+			sw = ring_next_sw(n_sw, cdir, -1);
+			if (sw && !sw_in_master_stree(sw)) {
+				grow_master_stree_branch(n_sw, sw, pg, ng);
+				n_sw = sw;
+			} else
+				n_sw = NULL;
+		}
+		if (p_sw) {
+			sw = ring_next_sw(p_sw, cdir, 1);
+			if (sw && !sw_in_master_stree(sw)) {
+				grow_master_stree_branch(p_sw, sw, ng, pg);
+				p_sw = sw;
+			} else
+				p_sw = NULL;
+		}
+		if (!(n_sw || p_sw))
+			break;
+	}
+out:
+	return;
+}
+
+static
+bool torus_master_stree(struct torus *t)
+{
+	int i, j, k;
+	bool success = false;
+	struct t_switch *stree_root = find_stree_root(t);
+
+	if (stree_root)
+		build_master_stree_branch(stree_root, 0);
+	else
+		goto out;
+
+	k = stree_root->k;
+	for (i = 0; i < t->x_sz; i++) {
+		j = stree_root->j;
+		if (t->sw[i][j][k])
+			build_master_stree_branch(t->sw[i][j][k], 1);
+
+		for (j = 0; j < t->y_sz; j++)
+			if (t->sw[i][j][k])
+				build_master_stree_branch(t->sw[i][j][k], 2);
+	}
+	/*
+	 * At this point we should have a master spanning tree that contains
+	 * every present switch, for all fabrics that torus-2QoS can route
+	 * without deadlocks.  Make sure this is the case; otherwise warn
+	 * and return failure so we get bug reports.
+	 */
+	success = true;
+	for (i = 0; i < t->x_sz; i++)
+		for (j = 0; j < t->y_sz; j++)
+			for (k = 0; k < t->z_sz; k++) {
+				struct t_switch *sw = t->sw[i][j][k];
+				if (!sw || sw_in_master_stree(sw))
+					continue;
+
+				success = false;
+				OSM_LOG(&t->osm->log, OSM_LOG_ERROR,
+					"Error: sw 0x%04llx (%d,%d,%d) not in "
+					"torus multicast master spanning tree\n",
+					ntohllu(sw->n_id), i, j, k);
+			}
+out:
+	return success;
+}
+
 int route_torus(struct torus *t)
 {
 	int s;
@@ -8507,6 +8772,8 @@  int route_torus(struct torus *t)
 	for (s = 0; s < (int)t->switch_cnt; s++)
 		success = torus_lft(t, t->sw_pool[s]) && success;
 
+	success = success && torus_master_stree(t);
+
 	return success ? 0 : -1;
 }