From patchwork Mon Feb 4 13:20:05 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alex Netes X-Patchwork-Id: 2091971 X-Patchwork-Delegate: hal@mellanox.com Return-Path: X-Original-To: patchwork-linux-rdma@patchwork.kernel.org Delivered-To: patchwork-process-083081@patchwork1.kernel.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by patchwork1.kernel.org (Postfix) with ESMTP id 908893FD56 for ; Mon, 4 Feb 2013 13:20:39 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755340Ab3BDNUe (ORCPT ); Mon, 4 Feb 2013 08:20:34 -0500 Received: from mail-ve0-f181.google.com ([209.85.128.181]:35113 "EHLO mail-ve0-f181.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755236Ab3BDNUb (ORCPT ); Mon, 4 Feb 2013 08:20:31 -0500 Received: by mail-ve0-f181.google.com with SMTP id d10so4601748vea.26 for ; Mon, 04 Feb 2013 05:20:30 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20120113; h=x-received:sender:from:to:cc:subject:date:message-id:x-mailer :in-reply-to:references:x-gm-message-state; bh=6klBReM6pL5A7gse8s2gjXemZHHrmSJB778mdrOvvMo=; b=BITuePjgSNeVJZatD8hDFpBxbSLB6tcm0/2+HCdpPwP+QhuMSRRHP/oEk6H1xxkD7X iwiYRb0/xsOOZm3N6hbb7rdFhVo9pv+5iW0n0TGwCsJeOQMFUESJh1qPRVrSw+4Y1EPB DjOLu91tQX1cbLQTr36MKHPM4iIK8a05zQ7LzqrG8MuZDmWc93Tiebnw5/CnAiBQ14dx IyLQdkCEKfFIsqsq2yOTx/L/i4pM4FgQVJybzYuF9LnUcooYXDwUV54HoQXqFdqYEwA7 B1SbhifZXc0GdOZ5n4R5Idcu0sDOIAuqjsyWsgZU1SEZy0wh+dIRlnnUXTdlrqy6Zj6L sWLg== X-Received: by 10.52.90.100 with SMTP id bv4mr19419994vdb.48.1359984030175; Mon, 04 Feb 2013 05:20:30 -0800 (PST) Received: from localhost (out.voltaire.com. [193.47.165.251]) by mx.google.com with ESMTPS id u5sm21447664vef.0.2013.02.04.05.20.28 (version=TLSv1.2 cipher=RC4-SHA bits=128/128); Mon, 04 Feb 2013 05:20:29 -0800 (PST) From: Alex Netes To: linux-rdma@vger.kernel.org Cc: Shlomi Nimrodi , Alex Netes Subject: [PATCH] opensm/osm_ucast_ftree.c: Fix unranked nodes bug in FTree Date: Mon, 4 Feb 2013 15:20:05 +0200 Message-Id: <1359984011-30753-4-git-send-email-alexne@mellanox.com> X-Mailer: git-send-email 1.7.11.7 In-Reply-To: <1359984011-30753-1-git-send-email-alexne@mellanox.com> References: <1359984011-30753-1-git-send-email-alexne@mellanox.com> X-Gm-Message-State: ALoCoQmmaKHwz8ZaS+abWiWREKuUAGZv0QgEIus0fDWEOSnJchtKEDqmjzAN/ILj4Zc+oCiPzAnE Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Shlomi Nimrodi In case that nodes were unranked (for example: nodes which all their ports are unhealthy) sm crashed, therefore we need to remove from the ftree structure unranked switches and hca's which are connected only to unranked switches. Signed-off-by: Shlomi Nimrodi Signed-off-by: Alex Netes --- opensm/osm_ucast_ftree.c | 217 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 176 insertions(+), 41 deletions(-) diff --git a/opensm/osm_ucast_ftree.c b/opensm/osm_ucast_ftree.c index d58fff1..8363bd2 100644 --- a/opensm/osm_ucast_ftree.c +++ b/opensm/osm_ucast_ftree.c @@ -556,18 +556,21 @@ static ftree_sw_t *sw_create(IN ftree_fabric_t * p_ftree, sizeof(ftree_port_group_t *)); if (p_sw->down_port_groups == NULL) goto FREE_P_SW; + memset(p_sw->down_port_groups, 0, ports_num * sizeof(ftree_port_group_t *)); p_sw->up_port_groups = (ftree_port_group_t **) malloc(ports_num * sizeof(ftree_port_group_t *)); if (p_sw->up_port_groups == NULL) goto FREE_DOWN; + memset(p_sw->up_port_groups, 0, ports_num * sizeof(ftree_port_group_t *)); p_sw->sibling_port_groups = (ftree_port_group_t **) malloc(ports_num * sizeof(ftree_port_group_t *)); if (p_sw->sibling_port_groups == NULL) goto FREE_UP; + memset(p_sw->sibling_port_groups, 0, ports_num * sizeof(ftree_port_group_t *)); /* initialize lft buffer */ memset(p_osm_sw->new_lft, OSM_NO_PATH, p_osm_sw->lft_size); @@ -807,6 +810,8 @@ static ftree_hca_t *hca_create(IN osm_node_t * p_osm_node) free(p_hca); return NULL; } + memset(p_hca->up_port_groups, 0, osm_node_get_num_physp(p_hca->p_osm_node) * + sizeof(ftree_port_group_t *)); p_hca->up_port_groups_num = 0; return p_hca; } @@ -1757,11 +1762,11 @@ static boolean_t fabric_validate_topology(IN ftree_fabric_t * p_ftree) p_sw = p_next_sw; p_next_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item); - if (!reference_sw_arr[p_sw->rank]) { + if (!reference_sw_arr[p_sw->rank]) /* This is the first switch in the current level that we're checking - use it as a reference */ reference_sw_arr[p_sw->rank] = p_sw; - } else { + else { /* compare this switch properties to the reference switch */ if (reference_sw_arr[p_sw->rank]->up_port_groups_num != @@ -3254,7 +3259,8 @@ static void sw_reverse_rank(IN cl_map_item_t * const p_map_item, { ftree_fabric_t *p_ftree = (ftree_fabric_t *) context; ftree_sw_t *p_sw = (ftree_sw_t * const)p_map_item; - p_sw->rank = p_ftree->max_switch_rank - p_sw->rank; + if (p_sw->rank != 0xFFFFFFFF) + p_sw->rank = p_ftree->max_switch_rank - p_sw->rank; } /*************************************************** @@ -3517,7 +3523,8 @@ struct rank_root_cxt { ftree_fabric_t *fabric; cl_list_t *list; }; - +/*************************************************** + ***************************************************/ static int rank_root_sw_by_guid(void *cxt, uint64_t guid, char *p) { struct rank_root_cxt *c = cxt; @@ -3538,53 +3545,63 @@ static int rank_root_sw_by_guid(void *cxt, uint64_t guid, char *p) return 0; } - -static int fabric_rank_from_roots(IN ftree_fabric_t * p_ftree) +/*************************************************** + ***************************************************/ +static boolean_t fabric_load_roots(IN ftree_fabric_t * p_ftree, + IN cl_list_t* p_ranking_bfs_list) { struct rank_root_cxt context; + unsigned num_roots; + + if (p_ranking_bfs_list) { + cl_list_init(p_ranking_bfs_list, 10); + + /* Rank all the roots and add them to list */ + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "Fetching root nodes from file %s\n", + p_ftree->p_osm->subn.opt.root_guid_file); + + context.fabric = p_ftree; + context.list = p_ranking_bfs_list; + if (parse_node_map(p_ftree->p_osm->subn.opt.root_guid_file, + rank_root_sw_by_guid, &context)) { + return FALSE; + } + + num_roots = cl_list_count(p_ranking_bfs_list); + if (!num_roots) { + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB25: " + "No valid roots supplied\n"); + return FALSE; + } + + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + "Ranked %u valid root switches\n", num_roots); + } + return TRUE; +} +/*************************************************** + ***************************************************/ +static int fabric_rank_from_roots(IN ftree_fabric_t * p_ftree, + IN cl_list_t* p_ranking_bfs_list) +{ osm_node_t *p_osm_node; osm_node_t *p_remote_osm_node; osm_physp_t *p_osm_physp; ftree_sw_t *p_sw; ftree_sw_t *p_remote_sw; - cl_list_t ranking_bfs_list; int res = 0; - unsigned num_roots; unsigned max_rank = 0; unsigned i; OSM_LOG_ENTER(&p_ftree->p_osm->log); - cl_list_init(&ranking_bfs_list, 10); - - /* Rank all the roots and add them to list */ - OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, - "Fetching root nodes from file %s\n", - p_ftree->p_osm->subn.opt.root_guid_file); - - context.fabric = p_ftree; - context.list = &ranking_bfs_list; - if (parse_node_map(p_ftree->p_osm->subn.opt.root_guid_file, - rank_root_sw_by_guid, &context)) { - res = -1; - goto Exit; - } - num_roots = cl_list_count(&ranking_bfs_list); - if (!num_roots) { - OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_ERROR, "ERR AB25: " - "No valid roots supplied\n"); + if (!p_ranking_bfs_list) { res = -1; goto Exit; } - - OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, - "Ranked %u valid root switches\n", num_roots); - - /* Now the list has all the roots. - BFS the subnet and update rank on all the switches. */ - - while (!cl_is_list_empty(&ranking_bfs_list)) { - p_sw = (ftree_sw_t *) cl_list_remove_head(&ranking_bfs_list); + while (!cl_is_list_empty(p_ranking_bfs_list)) { + p_sw = (ftree_sw_t *) cl_list_remove_head(p_ranking_bfs_list); p_osm_node = p_sw->p_osm_sw->p_node; /* note: skipping port 0 on switches */ @@ -3615,7 +3632,7 @@ static int fabric_rank_from_roots(IN ftree_fabric_t * p_ftree) sw_get_guid_ho(p_remote_sw), p_remote_sw->rank); max_rank = p_remote_sw->rank; - cl_list_insert_tail(&ranking_bfs_list, + cl_list_insert_tail(p_ranking_bfs_list, p_remote_sw); } } @@ -3629,7 +3646,6 @@ static int fabric_rank_from_roots(IN ftree_fabric_t * p_ftree) p_ftree->max_switch_rank = max_rank; Exit: - cl_list_destroy(&ranking_bfs_list); OSM_LOG_EXIT(&p_ftree->p_osm->log); return res; } /* fabric_rank_from_roots() */ @@ -3678,18 +3694,50 @@ Exit: } /* fabric_rank_from_hcas() */ /*************************************************** + * After ranking from HCA's we want to re-rank using + * the roots ***************************************************/ +static int fabric_rerank_using_root(IN ftree_fabric_t * p_ftree, + IN cl_list_t* p_ranking_bfs_list) +{ + ftree_sw_t *p_sw = NULL; + ftree_sw_t *p_next_sw; + int res; + + OSM_LOG_ENTER(&p_ftree->p_osm->log); + cl_list_init(p_ranking_bfs_list, 10); + p_next_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); + while (p_next_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl)) { + p_sw = p_next_sw; + p_next_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item); + if (p_sw->rank == 0) + cl_list_insert_tail(p_ranking_bfs_list, p_sw); + else + p_sw->rank = 0xFFFFFFFF; + } + res = fabric_rank_from_roots(p_ftree, p_ranking_bfs_list); + OSM_LOG_EXIT(&p_ftree->p_osm->log); + return res; +} +/*************************************************** + ***************************************************/ static int fabric_rank(IN ftree_fabric_t * p_ftree) { - int res = 0; + int res = -1; + cl_list_t ranking_bfs_list; OSM_LOG_ENTER(&p_ftree->p_osm->log); - if (fabric_roots_provided(p_ftree)) - res = fabric_rank_from_roots(p_ftree); - else + if (fabric_roots_provided(p_ftree)){ + if (fabric_load_roots(p_ftree, &ranking_bfs_list)) + res = fabric_rank_from_roots(p_ftree, &ranking_bfs_list); + } + else { res = fabric_rank_from_hcas(p_ftree); + if (!res) + res = fabric_rerank_using_root(p_ftree, &ranking_bfs_list); + } if (res) goto Exit; @@ -3698,6 +3746,7 @@ static int fabric_rank(IN ftree_fabric_t * p_ftree) "FatTree max switch rank is %u\n", p_ftree->max_switch_rank); Exit: + cl_list_destroy(&ranking_bfs_list); OSM_LOG_EXIT(&p_ftree->p_osm->log); return res; } /* fabric_rank() */ @@ -3869,7 +3918,92 @@ Exit: /*************************************************** ***************************************************/ +/* Get HCA and switch node, check if this node is the + * Only switch that this HCA is connected to */ +static boolean_t has_one_remote_switch(IN ftree_fabric_t *p_ftree, + IN ftree_hca_t *p_hca, + IN osm_node_t* p_node) +{ + boolean_t found_other_sw = FALSE; + osm_physp_t *p_physp, *p_remote_physp; + int i = 1; + int ports_num; + + ports_num = osm_node_get_num_physp(p_hca->p_osm_node); + while (!found_other_sw && (i < ports_num)) { + p_physp = osm_node_get_physp_ptr(p_hca->p_osm_node, i); + if (p_physp){ + p_remote_physp = p_physp->p_remote_physp; + if (p_remote_physp && (p_remote_physp->p_node!=p_node)) + /* Found connection to sw that is not p_node */ + found_other_sw = TRUE; + } + i++; + } + + return (!found_other_sw); +} + +/*************************************************** + ***************************************************/ +/* Get a Sw and remove all depended HCA's, meaning all + * HCA's which this is the only switch they are connected + * to */ +static int remove_depended_hca(IN ftree_fabric_t *p_ftree, IN ftree_sw_t *p_sw) +{ + ftree_hca_t *p_hca; + int counter = 0; + int port_num; + osm_physp_t* physp; + osm_node_t* sw_node; + uint64_t remote_hca_guid; + + sw_node = p_sw->p_osm_sw->p_node; + for (port_num = 0; port_num < sw_node->physp_tbl_size; port_num++) { + physp = osm_node_get_physp_ptr(sw_node, port_num); + if (physp && physp->p_remote_physp) { + if (osm_node_get_type(physp->p_remote_physp->p_node) == IB_NODE_TYPE_CA) { + remote_hca_guid = + osm_node_get_node_guid(physp->p_remote_physp->p_node); + p_hca = fabric_get_hca_by_guid(p_ftree, remote_hca_guid); + if (p_hca && has_one_remote_switch(p_ftree, p_hca, sw_node)) { + cl_qmap_remove_item(&p_ftree->hca_tbl, &p_hca->map_item); + hca_destroy(p_hca); + counter++; + } + } + } + } + return counter; +} +/*************************************************** + ***************************************************/ +static void fabric_remove_unranked_sw(IN ftree_fabric_t *p_ftree) +{ + ftree_sw_t *p_sw = NULL; + ftree_sw_t *p_next_sw; + int removed_hca; + int count = 0; + p_next_sw = (ftree_sw_t *) cl_qmap_head(&p_ftree->sw_tbl); + while (p_next_sw != (ftree_sw_t *) cl_qmap_end(&p_ftree->sw_tbl)) { + p_sw = p_next_sw; + p_next_sw = (ftree_sw_t *) cl_qmap_next(&p_sw->map_item); + if (!sw_ranked(p_sw)) { + cl_qmap_remove_item(&p_ftree->sw_tbl,&p_sw->map_item); + removed_hca = remove_depended_hca(p_ftree, p_sw); + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, + "Removing Unranked sw 0x%" PRIx64 " (with %d dependent hca's)\n", + sw_get_guid_ho(p_sw),removed_hca); + sw_destroy(p_ftree, p_sw); + count++; + } + } + OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_DEBUG, + "Removed %d invalid switches\n", count); +} +/*************************************************** + ***************************************************/ static int construct_fabric(IN void *context) { ftree_fabric_t *p_ftree = context; @@ -3952,6 +4086,7 @@ static int construct_fabric(IN void *context) status = -1; goto Exit; } + fabric_remove_unranked_sw(p_ftree); /* For each hca and switch, construct array of ports. This is done after the whole FatTree data structure is ready,