@@ -154,6 +154,19 @@ struct link {
* type. Furthermore, if that type is PASSTHRU, then the connected links:
* 1) are parallel to a given coordinate direction
* 2) share the same two switches as endpoints.
+ *
+ * Torus-2QoS uses one master spanning tree for multicast, of which every
+ * multicast group spanning tree is a subtree. to_stree_root is a pointer
+ * to the next port_grp on the path to the master spanning tree root.
+ * to_stree_tip is a pointer to the next port_grp on the path to a master
+ * spanning tree branch tip.
+ *
+ * Each t_switch can have at most one port_grp with a non-NULL to_stree_root.
+ * Exactly one t_switch in the fabric will have all port_grp objects with
+ * to_stree_root NULL; it is the master spanning tree root.
+ *
+ * A t_switch with all port_grp objects where to_stree_tip is NULL is at a
+ * master spanning tree branch tip.
*/
struct port_grp {
enum endpt_type type;
@@ -163,6 +176,8 @@ struct port_grp {
unsigned sw_dlid_cnt; /* switch dlids routed through this group */
unsigned ca_dlid_cnt; /* CA dlids routed through this group */
struct t_switch *sw; /* what switch we're attached to */
+ struct port_grp *to_stree_root;
+ struct port_grp *to_stree_tip;
struct endpoint **port;
};
@@ -8499,6 +8514,256 @@ bool torus_lft(struct torus *t, struct t_switch *sw)
return success;
}
+static
+bool good_xy_ring(struct torus *t, int x, int y, int z)
+{
+ struct t_switch ****sw = t->sw;
+ bool good_ring = true;
+
+ for (x = 0; x < t->x_sz && good_ring; x++)
+ good_ring = sw[x][y][z];
+
+ for (y = 0; y < t->y_sz && good_ring; y++)
+ good_ring = sw[x][y][z];
+
+ return good_ring;
+}
+
+static
+struct t_switch *find_plane_mid(struct torus *t, int z)
+{
+ int x, dx, xm = t->x_sz / 2;
+ int y, dy, ym = t->y_sz / 2;
+ struct t_switch ****sw = t->sw;
+
+ if (good_xy_ring(t, xm, ym, z))
+ return sw[xm][ym][z];
+
+ for (dx = 1, dy = 1; dx <= xm && dy <= ym; dx++, dy++) {
+
+ x = canonicalize(xm - dx, t->x_sz);
+ y = canonicalize(ym - dy, t->y_sz);
+ if (good_xy_ring(t, x, y, z))
+ return sw[x][y][z];
+
+ x = canonicalize(xm + dx, t->x_sz);
+ y = canonicalize(ym + dy, t->y_sz);
+ if (good_xy_ring(t, x, y, z))
+ return sw[x][y][z];
+ }
+ return NULL;
+}
+
+static
+struct t_switch *find_stree_root(struct torus *t)
+{
+ int x, y, z, dz, zm = t->z_sz / 2;
+ struct t_switch ****sw = t->sw;
+ struct t_switch *root;
+ bool good_plane;
+
+ /*
+ * Look for a switch near the "center" (wrt. the datelines) of the
+ * torus, as that will be the most optimum spanning tree root. Use
+ * a search that is not exhaustive, on the theory that this routing
+ * engine isn't useful anyway if too many switches are missing.
+ *
+ * Also, want to pick an x-y plane with no missing switches, so that
+ * the master spanning tree construction algorithm doesn't have to
+ * deal with needing a turn on a missing switch.
+ */
+ for (dz = 0; dz <= zm; dz++) {
+
+ z = canonicalize(zm - dz, t->z_sz);
+ good_plane = true;
+ for (y = 0; y < t->y_sz && good_plane; y++)
+ for (x = 0; x < t->x_sz && good_plane; x++)
+ good_plane = sw[x][y][z];
+
+ if (good_plane) {
+ root = find_plane_mid(t, z);
+ if (root)
+ goto out;
+ }
+ if (!dz)
+ continue;
+
+ z = canonicalize(zm + dz, t->z_sz);
+ good_plane = true;
+ for (y = 0; y < t->y_sz && good_plane; y++)
+ for (x = 0; x < t->x_sz && good_plane; x++)
+ good_plane = sw[x][y][z];
+
+ if (good_plane) {
+ root = find_plane_mid(t, z);
+ if (root)
+ goto out;
+ }
+ }
+ /*
+ * Note that torus-2QoS can route a torus that is missing an entire
+ * column (switches with x,y constant, for all z values) without
+ * deadlocks.
+ *
+ * if we've reached this point, we must have a column of missing
+ * switches, as routable_torus() would have returned false for
+ * any other configuration of missing switches that made it through
+ * the above.
+ *
+ * So any switch in the mid-z plane will do as the root.
+ */
+ root = find_plane_mid(t, zm);
+out:
+ return root;
+}
+
+static
+bool sw_in_master_stree(struct t_switch *sw)
+{
+ int g;
+ bool connected;
+
+ connected = sw == sw->torus->master_stree_root;
+ for (g = 0; g < 2 * TORUS_MAX_DIM; g++)
+ connected = connected || sw->ptgrp[g].to_stree_root;
+
+ return connected;
+}
+
+static
+void grow_master_stree_branch(struct t_switch *root, struct t_switch *tip,
+ unsigned to_root_pg, unsigned to_tip_pg)
+{
+ root->ptgrp[to_tip_pg].to_stree_tip = &tip->ptgrp[to_root_pg];
+ tip->ptgrp[to_root_pg].to_stree_root = &root->ptgrp[to_tip_pg];
+}
+
+static
+void build_master_stree_branch(struct t_switch *branch_root, int cdir)
+{
+ struct t_switch *sw, *n_sw, *p_sw;
+ unsigned l, idx, cnt, pg, ng;
+
+ switch (cdir) {
+ case 0:
+ idx = branch_root->i;
+ cnt = branch_root->torus->x_sz;
+ break;
+ case 1:
+ idx = branch_root->j;
+ cnt = branch_root->torus->y_sz;
+ break;
+ case 2:
+ idx = branch_root->k;
+ cnt = branch_root->torus->z_sz;
+ break;
+ default:
+ goto out;
+ }
+ /*
+ * This algorithm intends that a spanning tree branch never crosses
+ * a dateline unless the 1-D ring for which we're building the branch
+ * is interrupted by failure. We need that guarantee to prevent
+ * multicast/unicast credit loops.
+ */
+ n_sw = branch_root; /* tip of negative cdir branch */
+ ng = 2 * cdir; /* negative cdir port group index */
+ p_sw = branch_root; /* tip of positive cdir branch */
+ pg = 2 * cdir + 1; /* positive cdir port group index */
+
+ for (l = idx; n_sw && l >= 1; l--) {
+ sw = ring_next_sw(n_sw, cdir, -1);
+ if (sw && !sw_in_master_stree(sw)) {
+ grow_master_stree_branch(n_sw, sw, pg, ng);
+ n_sw = sw;
+ } else
+ n_sw = NULL;
+ }
+ for (l = idx; p_sw && l < (cnt - 1); l++) {
+ sw = ring_next_sw(p_sw, cdir, 1);
+ if (sw && !sw_in_master_stree(sw)) {
+ grow_master_stree_branch(p_sw, sw, ng, pg);
+ p_sw = sw;
+ } else
+ p_sw = NULL;
+ }
+ if (n_sw && p_sw)
+ goto out;
+ /*
+ * At least one branch couldn't grow to the dateline for this ring.
+ * That means it is acceptable to grow the branch by crossing the
+ * dateline.
+ */
+ for (l = 0; l < cnt; l++) {
+ if (n_sw) {
+ sw = ring_next_sw(n_sw, cdir, -1);
+ if (sw && !sw_in_master_stree(sw)) {
+ grow_master_stree_branch(n_sw, sw, pg, ng);
+ n_sw = sw;
+ } else
+ n_sw = NULL;
+ }
+ if (p_sw) {
+ sw = ring_next_sw(p_sw, cdir, 1);
+ if (sw && !sw_in_master_stree(sw)) {
+ grow_master_stree_branch(p_sw, sw, ng, pg);
+ p_sw = sw;
+ } else
+ p_sw = NULL;
+ }
+ if (!(n_sw || p_sw))
+ break;
+ }
+out:
+ return;
+}
+
+static
+bool torus_master_stree(struct torus *t)
+{
+ int i, j, k;
+ bool success = false;
+ struct t_switch *stree_root = find_stree_root(t);
+
+ if (stree_root)
+ build_master_stree_branch(stree_root, 0);
+ else
+ goto out;
+
+ k = stree_root->k;
+ for (i = 0; i < t->x_sz; i++) {
+ j = stree_root->j;
+ if (t->sw[i][j][k])
+ build_master_stree_branch(t->sw[i][j][k], 1);
+
+ for (j = 0; j < t->y_sz; j++)
+ if (t->sw[i][j][k])
+ build_master_stree_branch(t->sw[i][j][k], 2);
+ }
+ /*
+ * At this point we should have a master spanning tree that contains
+ * every present switch, for all fabrics that torus-2QoS can route
+ * without deadlocks. Make sure this is the case; otherwise warn
+ * and return failure so we get bug reports.
+ */
+ success = true;
+ for (i = 0; i < t->x_sz; i++)
+ for (j = 0; j < t->y_sz; j++)
+ for (k = 0; k < t->z_sz; k++) {
+ struct t_switch *sw = t->sw[i][j][k];
+ if (!sw || sw_in_master_stree(sw))
+ continue;
+
+ success = false;
+ OSM_LOG(&t->osm->log, OSM_LOG_ERROR,
+ "Error: sw 0x%04llx (%d,%d,%d) not in "
+ "torus multicast master spanning tree\n",
+ ntohllu(sw->n_id), i, j, k);
+ }
+out:
+ return success;
+}
+
int route_torus(struct torus *t)
{
int s;
@@ -8507,6 +8772,8 @@ int route_torus(struct torus *t)
for (s = 0; s < (int)t->switch_cnt; s++)
success = torus_lft(t, t->sw_pool[s]) && success;
+ success = success && torus_master_stree(t);
+
return success ? 0 : -1;
}