From patchwork Wed Aug 4 18:24:38 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Arlin Davis X-Patchwork-Id: 117112 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter.kernel.org (8.14.4/8.14.3) with ESMTP id o74IPG3h002316 for ; Wed, 4 Aug 2010 18:25:16 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1758537Ab0HDSYk (ORCPT ); Wed, 4 Aug 2010 14:24:40 -0400 Received: from mga01.intel.com ([192.55.52.88]:15079 "EHLO mga01.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1758552Ab0HDSYj convert rfc822-to-8bit (ORCPT ); Wed, 4 Aug 2010 14:24:39 -0400 Received: from fmsmga001.fm.intel.com ([10.253.24.23]) by fmsmga101.fm.intel.com with ESMTP; 04 Aug 2010 11:21:35 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.55,316,1278313200"; d="scan'208";a="824724738" Received: from orsmsx603.amr.corp.intel.com ([10.22.226.49]) by fmsmga001.fm.intel.com with ESMTP; 04 Aug 2010 11:24:24 -0700 Received: from orsmsx506.amr.corp.intel.com ([10.22.226.44]) by orsmsx603.amr.corp.intel.com ([10.22.226.49]) with mapi; Wed, 4 Aug 2010 11:24:38 -0700 From: "Davis, Arlin R" To: linux-rdma , "ofw@lists.openfabrics.org" Date: Wed, 4 Aug 2010 11:24:38 -0700 Subject: [PATCH] dapl-1.2: common, cma: disconnect and cleanup CR linkings after DTO error on EP Thread-Topic: [PATCH] dapl-1.2: common, cma: disconnect and cleanup CR linkings after DTO error on EP Thread-Index: AcsNeGIZrKpDLe9TRUW5U9YZUm3b7gk+JRTQ Message-ID: Accept-Language: en-US Content-Language: en-US X-MS-Has-Attach: X-MS-TNEF-Correlator: acceptlanguage: en-US MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter.kernel.org [140.211.167.41]); Wed, 04 Aug 2010 18:25:18 +0000 (UTC) diff --git a/dapl/common/dapl_evd_util.c b/dapl/common/dapl_evd_util.c index 8ea2ce8..e3655fb 100644 --- a/dapl/common/dapl_evd_util.c +++ b/dapl/common/dapl_evd_util.c @@ -1169,11 +1169,41 @@ dapli_evd_cqe_to_event ( * Most error DTO ops result in disconnecting the EP. See * IBTA Vol 1.1, Chapter 10,Table 68, for expected effect on * state. The QP going to error state will trigger disconnect - * at provider level. No need to force disconnect here. Just - * print error log. + * at provider level. QP errors and CM events are independent, + * issue CM disconnect and cleanup any pending CR's */ if ((dto_status != DAT_DTO_SUCCESS) && (dto_status != DAT_DTO_ERR_FLUSHED)) { + dapl_os_lock ( &ep_ptr->header.lock ); + if (ep_ptr->param.ep_state == DAT_EP_STATE_CONNECTED || + ep_ptr->param.ep_state == DAT_EP_STATE_ACTIVE_CONNECTION_PENDING || + ep_ptr->param.ep_state == DAT_EP_STATE_PASSIVE_CONNECTION_PENDING|| + ep_ptr->param.ep_state == DAT_EP_STATE_COMPLETION_PENDING ) + { + ep_ptr->param.ep_state = DAT_EP_STATE_DISCONNECTED; + dapl_os_unlock ( &ep_ptr->header.lock ); + dapls_io_trc_dump (ep_ptr, cqe_ptr, dto_status); + + /* Let the other side know we have disconnected */ + (void) dapls_ib_disconnect (ep_ptr, DAT_CLOSE_ABRUPT_FLAG); + + /* ... and clean up the local side */ + evd_ptr = (DAPL_EVD *) ep_ptr->param.connect_evd_handle; + dapl_sp_remove_ep(ep_ptr); + if (evd_ptr != NULL) + { + dapls_evd_post_connection_event (evd_ptr, + DAT_CONNECTION_EVENT_BROKEN, + (DAT_HANDLE) ep_ptr, + 0, + 0); + } + } + else + { + dapl_os_unlock ( &ep_ptr->header.lock ); + } + dapl_log(DAPL_DBG_TYPE_ERR, "DTO completion ERR: status %d, op %s, vendor_err 0x%x - %s\n", DAPL_GET_CQE_STATUS(cqe_ptr), diff --git a/dapl/common/dapl_sp_util.c b/dapl/common/dapl_sp_util.c index 1ca1204..310e601 100644 --- a/dapl/common/dapl_sp_util.c +++ b/dapl/common/dapl_sp_util.c @@ -290,6 +290,8 @@ dapl_sp_remove_ep ( dapl_os_unlock (&sp_ptr->header.lock); + ep_ptr->cr_ptr = NULL; + /* free memory outside of the lock */ dapls_cr_free (cr_ptr); diff --git a/dapl/openib_cma/dapl_ib_cm.c b/dapl/openib_cma/dapl_ib_cm.c index 576e19e..8332d46 100755 --- a/dapl/openib_cma/dapl_ib_cm.c +++ b/dapl/openib_cma/dapl_ib_cm.c @@ -610,21 +610,12 @@ dapls_ib_disconnect(IN DAPL_EP *ep_ptr, dapl_dbg_log(DAPL_DBG_TYPE_ERR, " disconnect: ID %p ret %d\n", ep_ptr->cm_handle, ret); - - /* ABRUPT close, wait for callback and !DISCONNECT_PENDING state */ - if (close_flags == DAT_CLOSE_ABRUPT_FLAG) { - dapl_os_lock(&ep_ptr->header.lock); - while (ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECT_PENDING) { - dapl_os_unlock(&ep_ptr->header.lock); - dapl_os_sleep_usec(10000); - dapl_os_lock(&ep_ptr->header.lock); - } - dapl_os_unlock(&ep_ptr->header.lock); - } - /* * DAT event notification occurs from the callback - * Note: will fire even if DREQ goes unanswered on timeout + * Don't wait for event, allow consumer option to + * to give up and destroy cm_id if event is delayed. + * EP DISCONNECTED state protects against duplicate + * events being queued. */ return DAT_SUCCESS; }