From patchwork Tue Nov 3 02:00:28 2009 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Sasha Khapyorsky X-Patchwork-Id: 57182 Received: from vger.kernel.org (vger.kernel.org [209.132.176.167]) by demeter.kernel.org (8.14.2/8.14.2) with ESMTP id nA31wNeh012036 for ; Tue, 3 Nov 2009 01:58:23 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755830AbZKCB6Q (ORCPT ); Mon, 2 Nov 2009 20:58:16 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1756784AbZKCB6Q (ORCPT ); Mon, 2 Nov 2009 20:58:16 -0500 Received: from mail-ew0-f228.google.com ([209.85.219.228]:43817 "EHLO mail-ew0-f228.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755830AbZKCB6P (ORCPT ); Mon, 2 Nov 2009 20:58:15 -0500 Received: by ewy28 with SMTP id 28so5530625ewy.18 for ; Mon, 02 Nov 2009 17:58:19 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=gamma; h=domainkey-signature:received:received:sender:received:date:from:to :cc:subject:message-id:references:mime-version:content-type :content-disposition:in-reply-to:user-agent; bh=+qmTuVaKXOItsGhtoCFivuizdoTV/MyLb28IcTxVaPM=; b=q07YyKhLK8Y/cvpcW6XYguJ6J6MeQhESgVFdDeSj2ZiCsnQ25te4ZskyfWinPhPM0A y7QqMsNzQkOGRaOMWKjnURFjLhyeEaXkoniFELWn+4kMfkJI4jbaJwg/RStCOOwpeZnp fiS00ESz6nPGXzqfiaUmQoACk31XhzSCIfX9I= DomainKey-Signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=sender:date:from:to:cc:subject:message-id:references:mime-version :content-type:content-disposition:in-reply-to:user-agent; b=OC2Mp334gsaLDehJszgX4eilj7VmWKjWkv2Lugv4vSQ73s8Lh3oQ4jbpGngPkfl+NV t6hi4jBRpmpzQGwLfOBCTeJV+zN4WP7I7XPOSxxu3uWqj4xXhpXnM9qCiCLyFfx1bIyN xboqCwZ3FKgtJkBOHq3pQ+YIshNF/d/I1EJ4s= Received: by 10.213.89.146 with SMTP id e18mr62432ebm.87.1257213499615; Mon, 02 Nov 2009 17:58:19 -0800 (PST) Received: from me.localdomain (85.64.35.106.dynamic.barak-online.net [85.64.35.106]) by mx.google.com with ESMTPS id 5sm7392033eyf.9.2009.11.02.17.58.17 (version=TLSv1/SSLv3 cipher=RC4-MD5); Mon, 02 Nov 2009 17:58:18 -0800 (PST) Received: by me.localdomain (Postfix, from userid 1000) id AEC6811F87; Tue, 3 Nov 2009 04:00:28 +0200 (IST) Date: Tue, 3 Nov 2009 04:00:28 +0200 From: Sasha Khapyorsky To: Hal Rosenstock Cc: linux-rdma@vger.kernel.org Subject: Re: [PATCH] opensm/osm_trap_rcv.c: More minor reorg of trap_rcv_process_request Message-ID: <20091103020028.GB17404@me> References: <20091102115051.GA32233@comcast.net> MIME-Version: 1.0 Content-Disposition: inline In-Reply-To: <20091102115051.GA32233@comcast.net> User-Agent: Mutt/1.5.20 (2009-06-14) Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org diff --git a/opensm/opensm/osm_trap_rcv.c b/opensm/opensm/osm_trap_rcv.c index 0ee4e77..dae892e 100644 --- a/opensm/opensm/osm_trap_rcv.c +++ b/opensm/opensm/osm_trap_rcv.c @@ -226,7 +226,6 @@ static int disable_port(osm_sm_t *sm, osm_physp_t *p) uint8_t payload[IB_SMP_DATA_SIZE]; osm_madw_context_t context; ib_port_info_t *pi = (ib_port_info_t *)payload; - int ret; /* select the nearest port to master opensm */ if (p->p_remote_physp && @@ -236,10 +235,6 @@ static int disable_port(osm_sm_t *sm, osm_physp_t *p) /* If trap 131, might want to disable peer port if available */ /* but peer port has been observed not to respond to SM requests */ - OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3810: " - "Disabling physical port 0x%016" PRIx64 " num:%u\n", - cl_ntoh64(osm_physp_get_port_guid(p)), p->port_num); - memcpy(payload, &p->port_info, sizeof(ib_port_info_t)); /* Set port to disabled/down */ @@ -253,15 +248,10 @@ static int disable_port(osm_sm_t *sm, osm_physp_t *p) context.pi_context.light_sweep = FALSE; context.pi_context.active_transition = FALSE; - ret = osm_req_set(sm, osm_physp_get_dr_path_ptr(p), - payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO, - cl_hton32(osm_physp_get_port_num(p)), - CL_DISP_MSGID_NONE, &context); - if (ret) - OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3811: " - "Request to set PortInfo failed\n"); - - return ret; + return osm_req_set(sm, osm_physp_get_dr_path_ptr(p), + payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO, + cl_hton32(osm_physp_get_port_num(p)), + CL_DISP_MSGID_NONE, &context); } static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci, @@ -301,6 +291,42 @@ static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci, cl_ntoh16(source_lid), cl_ntoh64(trans_id)); } +static int shutup_noisy_port(osm_sm_t *sm, uint16_t lid, uint8_t port, + unsigned num) +{ + osm_physp_t *p = get_physp_by_lid_and_num(sm, lid, port); + if (!p) { + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3805: " + "Failed to find physical port by lid:%u num:%u\n", + lid, port); + return -1; + } + + /* When babbling port policy option is enabled and + Threshold for disabling a "babbling" port is exceeded */ + if (sm->p_subn->opt.babbling_port_policy && num >= 250) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Disabling noisy physical port 0x%016" PRIx64 + ": lid %u, num %u\n", + cl_ntoh64(osm_physp_get_port_guid(p)), lid, port); + if (disable_port(sm, p)) + OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3811: " + "Failed to disable.\n"); + else + return 1; + } + + /* check if the current state of the p_physp is healthy. If + it is - then this is a first change of state. Run a heavy sweep. */ + if (osm_physp_is_healthy(p)) { + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, + "Marking unhealthy physical port by lid:%u num:%u\n", + lid, port); + osm_physp_set_health(p, FALSE); + return 2; + } + return 0; +} /********************************************************************** **********************************************************************/ static void trap_rcv_process_request(IN osm_sm_t * sm, @@ -438,7 +464,7 @@ static void trap_rcv_process_request(IN osm_sm_t * sm, /* Now we know how many times it provided this trap */ if (num_received > 10) { if (print_num_received(num_received)) - OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3804: " + OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, "Received trap %u times consecutively\n", num_received); /* @@ -446,49 +472,17 @@ static void trap_rcv_process_request(IN osm_sm_t * sm, * we mark it as unhealthy. */ if (physp_change_trap == TRUE) { - /* get the port */ - p_physp = get_physp_by_lid_and_num(sm, - cl_ntoh16 - (source_lid), - port_num); - - if (!p_physp) - OSM_LOG(sm->p_log, OSM_LOG_ERROR, - "ERR 3805: " - "Failed to find physical port by lid:%u num:%u\n", - cl_ntoh16(source_lid), - port_num); - else { - /* When babbling port policy option is enabled and - Threshold for disabling a "babbling" port is exceeded */ - if (sm->p_subn->opt. - babbling_port_policy - && num_received >= 250 - && disable_port(sm, p_physp) == 0) - goto Exit; - - OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, - "Marking unhealthy physical port by lid:%u num:%u\n", - cl_ntoh16(source_lid), - port_num); - /* check if the current state of the p_physp is healthy. If - it is - then this is a first change of state. Run a heavy sweep. - if it is not - no need to mark it again - just restart the timer. */ - if (osm_physp_is_healthy(p_physp)) { - osm_physp_set_health(p_physp, - FALSE); - /* Make sure we sweep again - force a heavy sweep. */ - /* The sweep should be done only after the re-registration, or - else we'll be losing track of the timer. */ - run_heavy_sweep = TRUE; - } - /* If we are marking the port as unhealthy - we want to - keep this for a longer period of time than the - OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT. Use the - OSM_DEFAULT_UNHEALTHY_TIMEOUT */ - event_wheel_timeout = - OSM_DEFAULT_UNHEALTHY_TIMEOUT; - } + int ret = shutup_noisy_port(sm, + cl_ntoh16(source_lid), + port_num, + num_received); + if (ret == 1) /* port disabled */ + goto Exit; + else if (ret == 2) /* unhealthy - run sweep */ + run_heavy_sweep = TRUE; + /* in any case increase timeout interval */ + event_wheel_timeout = + OSM_DEFAULT_UNHEALTHY_TIMEOUT; } } @@ -508,8 +502,7 @@ static void trap_rcv_process_request(IN osm_sm_t * sm, if (num_received > 10 && run_heavy_sweep == FALSE) { if (print_num_received(num_received)) OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, - "Continuously received this trap %u times. Ignoring\n", - num_received); + "Ignoring noisy traps.\n"); goto Exit; } }