From patchwork Tue May 11 08:22:05 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 12250173 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.7 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id EFE55C43461 for ; Tue, 11 May 2021 08:31:39 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id BAC10611C9 for ; Tue, 11 May 2021 08:31:39 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231150AbhEKIcm (ORCPT ); Tue, 11 May 2021 04:32:42 -0400 Received: from mail.kernel.org ([198.145.29.99]:42550 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230486AbhEKIci (ORCPT ); Tue, 11 May 2021 04:32:38 -0400 Received: by mail.kernel.org (Postfix) with ESMTPSA id BB4366192E; Tue, 11 May 2021 08:22:43 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1620721364; bh=2nP8Ko0C7D3HKZqDrCRhXV8tFaTyj6Sttgh/p6V3Y1U=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=aA3eskYcZpLvniUFME+bvpf8n01C2PF74ExE3kUFlP6v98cm7R3C8AyvYehDNBoBb QxbMSWSCi2H4uwc/WKKk6x5x4k1WV1ba8T2SLPLWb2JVSpPZbBC2VNHYEwYBmFJtTg CApiiLGv7RftT0M/ybQVoh+lCtBt7e/A8QzRWVBC8qgrFwbIl7R+DTakivKE8Q8U4c XpRwavQ0ljXHBhS3OguLPtzXdnib46ABgA5gaJ9ZfUfesvEwWgw6xNIgk3ZQURy+lp 20QfPGxs3hAC3seLsAg3ZEWXaMb8UaEAwLHVWQO5vO5mJKwtlvRkhP+rdvQCeW+qV+ nz/jtlfMBSs8Q== From: Leon Romanovsky To: Doug Ledford , Jason Gunthorpe Cc: linux-kernel@vger.kernel.org, linux-rdma@vger.kernel.org, Mark Zhang , Sean Hefty Subject: [PATCH rdma-next v3 1/8] IB/cm: Pair cm_alloc_response_msg() with a cm_free_response_msg() Date: Tue, 11 May 2021 11:22:05 +0300 Message-Id: <598e654b67ebdfaa52c6659113d360fabec7a5e8.1620720467.git.leonro@nvidia.com> X-Mailer: git-send-email 2.31.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Jason Gunthorpe This is not a functional change, but it helps make the purpose of all the cm_free_msg() calls clearer. In this case a response msg has a NULL context[0], and is never placed in cm_id_priv->msg. Signed-off-by: Jason Gunthorpe Signed-off-by: Mark Zhang Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cm.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 0ead0d223154..6e20ba5d32e1 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -413,7 +413,7 @@ static int cm_alloc_response_msg(struct cm_port *port, ret = cm_create_response_msg_ah(port, mad_recv_wc, m); if (ret) { - cm_free_msg(m); + ib_free_send_mad(m); return ret; } @@ -421,6 +421,13 @@ static int cm_alloc_response_msg(struct cm_port *port, return 0; } +static void cm_free_response_msg(struct ib_mad_send_buf *msg) +{ + if (msg->ah) + rdma_destroy_ah(msg->ah, 0); + ib_free_send_mad(msg); +} + static void *cm_copy_private_data(const void *private_data, u8 private_data_len) { void *data; @@ -1569,16 +1576,16 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, spin_lock_irqsave(&cm_id_priv->lock, flags); ret = ib_post_send_mad(cm_id_priv->msg, NULL); if (ret) { + cm_free_msg(cm_id_priv->msg); spin_unlock_irqrestore(&cm_id_priv->lock, flags); - goto error2; + goto out; } BUG_ON(cm_id->state != IB_CM_IDLE); cm_id->state = IB_CM_REQ_SENT; spin_unlock_irqrestore(&cm_id_priv->lock, flags); return 0; - -error2: cm_free_msg(cm_id_priv->msg); -out: return ret; +out: + return ret; } EXPORT_SYMBOL(ib_send_cm_req); @@ -1618,7 +1625,7 @@ static int cm_issue_rej(struct cm_port *port, IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg)); ret = ib_post_send_mad(msg, NULL); if (ret) - cm_free_msg(msg); + cm_free_response_msg(msg); return ret; } @@ -1974,7 +1981,7 @@ static void cm_dup_req_handler(struct cm_work *work, return; unlock: spin_unlock_irq(&cm_id_priv->lock); -free: cm_free_msg(msg); +free: cm_free_response_msg(msg); } static struct cm_id_private *cm_match_req(struct cm_work *work, @@ -2453,7 +2460,7 @@ static void cm_dup_rep_handler(struct cm_work *work) goto deref; unlock: spin_unlock_irq(&cm_id_priv->lock); -free: cm_free_msg(msg); +free: cm_free_response_msg(msg); deref: cm_deref_id(cm_id_priv); } @@ -2794,7 +2801,7 @@ static int cm_issue_drep(struct cm_port *port, IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg)); ret = ib_post_send_mad(msg, NULL); if (ret) - cm_free_msg(msg); + cm_free_response_msg(msg); return ret; } @@ -2853,7 +2860,7 @@ static int cm_dreq_handler(struct cm_work *work) if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) || ib_post_send_mad(msg, NULL)) - cm_free_msg(msg); + cm_free_response_msg(msg); goto deref; case IB_CM_DREQ_RCVD: atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. @@ -3343,7 +3350,7 @@ static int cm_lap_handler(struct cm_work *work) if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) || ib_post_send_mad(msg, NULL)) - cm_free_msg(msg); + cm_free_response_msg(msg); goto deref; case IB_CM_LAP_RCVD: atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. From patchwork Tue May 11 08:22:06 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 12250167 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.7 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id AE91CC43470 for ; Tue, 11 May 2021 08:31:36 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 8047C6134F for ; Tue, 11 May 2021 08:31:36 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230519AbhEKIcj (ORCPT ); Tue, 11 May 2021 04:32:39 -0400 Received: from mail.kernel.org ([198.145.29.99]:42540 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230464AbhEKIci (ORCPT ); Tue, 11 May 2021 04:32:38 -0400 Received: by mail.kernel.org (Postfix) with ESMTPSA id 8EF2D61584; Tue, 11 May 2021 08:22:28 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1620721349; bh=TmYP+pfyU+sGUGC5INPFqTbHjGUlf9MYdjV6GCjghlE=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=J+ktot/eg7KJY+w6Yh14HIoaq4yUPLAaLopFGl/eOM2e82qD2rBY0EKcf+iWCvWHN HE8swqy1n16H1k0zt4Ypgl3fT0S2NPB3aluJ7vLtusrVy/tXsK1tv5DJU8b85yHDal hTUzpysCDlKg5rGxN9TUEYt9rG/Y7S2+QHkFHCfWTfZPhzqtYdpyjUGEsmF9RdlwX1 IwViSi5ck177Igl1Le3k2JdQN8QmY970mz4OVEAQkaVya3pMILPPdyE1VNh9v0FHHr R05MdeLF6EcXvv+DIM/GZ1pq+mxIsc/Y4soFiL+G4BNuv978aNGLIYtQgEe0wDEmna BJfIKU7ixJT1w== From: Leon Romanovsky To: Doug Ledford , Jason Gunthorpe Cc: linux-kernel@vger.kernel.org, linux-rdma@vger.kernel.org, Mark Zhang , Sean Hefty Subject: [PATCH rdma-next v3 2/8] IB/cm: Split cm_alloc_msg() Date: Tue, 11 May 2021 11:22:06 +0300 Message-Id: <88179e5e4b87f9a989b9e954df1b758bf666478a.1620720467.git.leonro@nvidia.com> X-Mailer: git-send-email 2.31.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Jason Gunthorpe This is being used with two quite different flows, one attaches the message to the priv and the other does not. Ensure the message attach is consistently done under the spinlock and ensure that the free on error always detaches the message from the cm_id_priv, also always under lock. This makes read/write to the cm_id_priv->msg consistently locked and consistently NULL'd when the message is freed, even in all error paths. Signed-off-by: Jason Gunthorpe Signed-off-by: Mark Zhang Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cm.c | 190 +++++++++++++++++++++-------------- 1 file changed, 115 insertions(+), 75 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 6e20ba5d32e1..94613275edcc 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -305,8 +305,7 @@ static inline void cm_deref_id(struct cm_id_private *cm_id_priv) complete(&cm_id_priv->comp); } -static int cm_alloc_msg(struct cm_id_private *cm_id_priv, - struct ib_mad_send_buf **msg) +static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv) { struct ib_mad_agent *mad_agent; struct ib_mad_send_buf *m; @@ -359,12 +358,42 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, m->retries = cm_id_priv->max_cm_retries; refcount_inc(&cm_id_priv->refcount); + spin_unlock_irqrestore(&cm.state_lock, flags2); m->context[0] = cm_id_priv; - *msg = m; + return m; out: spin_unlock_irqrestore(&cm.state_lock, flags2); - return ret; + return ERR_PTR(ret); +} + +static struct ib_mad_send_buf * +cm_alloc_priv_msg(struct cm_id_private *cm_id_priv) +{ + struct ib_mad_send_buf *msg; + + lockdep_assert_held(&cm_id_priv->lock); + + msg = cm_alloc_msg(cm_id_priv); + if (IS_ERR(msg)) + return msg; + cm_id_priv->msg = msg; + return msg; +} + +static void cm_free_priv_msg(struct ib_mad_send_buf *msg) +{ + struct cm_id_private *cm_id_priv = msg->context[0]; + + lockdep_assert_held(&cm_id_priv->lock); + + if (!WARN_ON(cm_id_priv->msg != msg)) + cm_id_priv->msg = NULL; + + if (msg->ah) + rdma_destroy_ah(msg->ah, 0); + cm_deref_id(cm_id_priv); + ib_free_send_mad(msg); } static struct ib_mad_send_buf *cm_alloc_response_msg_no_ah(struct cm_port *port, @@ -1508,6 +1537,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, struct ib_cm_req_param *param) { struct cm_id_private *cm_id_priv; + struct ib_mad_send_buf *msg; struct cm_req_msg *req_msg; unsigned long flags; int ret; @@ -1559,31 +1589,34 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, cm_id_priv->pkey = param->primary_path->pkey; cm_id_priv->qp_type = param->qp_type; - ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg); - if (ret) - goto out; + spin_lock_irqsave(&cm_id_priv->lock, flags); + msg = cm_alloc_priv_msg(cm_id_priv); + if (IS_ERR(msg)) { + ret = PTR_ERR(msg); + goto out_unlock; + } - req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad; + req_msg = (struct cm_req_msg *)msg->mad; cm_format_req(req_msg, cm_id_priv, param); cm_id_priv->tid = req_msg->hdr.tid; - cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms; - cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT; + msg->timeout_ms = cm_id_priv->timeout_ms; + msg->context[1] = (void *)(unsigned long)IB_CM_REQ_SENT; cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg)); cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg)); trace_icm_send_req(&cm_id_priv->id); - spin_lock_irqsave(&cm_id_priv->lock, flags); - ret = ib_post_send_mad(cm_id_priv->msg, NULL); - if (ret) { - cm_free_msg(cm_id_priv->msg); - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - goto out; - } + ret = ib_post_send_mad(msg, NULL); + if (ret) + goto out_free; BUG_ON(cm_id->state != IB_CM_IDLE); cm_id->state = IB_CM_REQ_SENT; spin_unlock_irqrestore(&cm_id_priv->lock, flags); return 0; +out_free: + cm_free_priv_msg(msg); +out_unlock: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); out: return ret; } @@ -2290,9 +2323,11 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, goto out; } - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) + msg = cm_alloc_priv_msg(cm_id_priv); + if (IS_ERR(msg)) { + ret = PTR_ERR(msg); goto out; + } rep_msg = (struct cm_rep_msg *) msg->mad; cm_format_rep(rep_msg, cm_id_priv, param); @@ -2301,14 +2336,10 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, trace_icm_send_rep(cm_id); ret = ib_post_send_mad(msg, NULL); - if (ret) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - cm_free_msg(msg); - return ret; - } + if (ret) + goto out_free; cm_id->state = IB_CM_REP_SENT; - cm_id_priv->msg = msg; cm_id_priv->initiator_depth = param->initiator_depth; cm_id_priv->responder_resources = param->responder_resources; cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REP_STARTING_PSN, rep_msg)); @@ -2316,8 +2347,13 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, "IBTA declares QPN to be 24 bits, but it is 0x%X\n", param->qp_num); cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF); + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return 0; -out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); +out_free: + cm_free_priv_msg(msg); +out: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_rep); @@ -2364,9 +2400,11 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id, goto error; } - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) + msg = cm_alloc_msg(cm_id_priv); + if (IS_ERR(msg)) { + ret = PTR_ERR(msg); goto error; + } cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv, private_data, private_data_len); @@ -2664,10 +2702,10 @@ static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv, cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) { + msg = cm_alloc_priv_msg(cm_id_priv); + if (IS_ERR(msg)) { cm_enter_timewait(cm_id_priv); - return ret; + return PTR_ERR(msg); } cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv, @@ -2679,12 +2717,11 @@ static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv, ret = ib_post_send_mad(msg, NULL); if (ret) { cm_enter_timewait(cm_id_priv); - cm_free_msg(msg); + cm_free_priv_msg(msg); return ret; } cm_id_priv->id.state = IB_CM_DREQ_SENT; - cm_id_priv->msg = msg; return 0; } @@ -2739,9 +2776,9 @@ static int cm_send_drep_locked(struct cm_id_private *cm_id_priv, cm_set_private_data(cm_id_priv, private_data, private_data_len); cm_enter_timewait(cm_id_priv); - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - return ret; + msg = cm_alloc_msg(cm_id_priv); + if (IS_ERR(msg)) + return PTR_ERR(msg); cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, private_data, private_data_len); @@ -2934,9 +2971,9 @@ static int cm_send_rej_locked(struct cm_id_private *cm_id_priv, case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: cm_reset_to_idle(cm_id_priv); - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - return ret; + msg = cm_alloc_msg(cm_id_priv); + if (IS_ERR(msg)) + return PTR_ERR(msg); cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv, reason, ari, ari_length, private_data, private_data_len, state); @@ -2944,9 +2981,9 @@ static int cm_send_rej_locked(struct cm_id_private *cm_id_priv, case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: cm_enter_timewait(cm_id_priv); - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - return ret; + msg = cm_alloc_msg(cm_id_priv); + if (IS_ERR(msg)) + return PTR_ERR(msg); cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv, reason, ari, ari_length, private_data, private_data_len, state); @@ -3124,13 +3161,15 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, default: trace_icm_send_mra_unknown_err(&cm_id_priv->id); ret = -EINVAL; - goto error1; + goto error_unlock; } if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) { - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - goto error1; + msg = cm_alloc_msg(cm_id_priv); + if (IS_ERR(msg)) { + ret = PTR_ERR(msg); + goto error_unlock; + } cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, msg_response, service_timeout, @@ -3138,7 +3177,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, trace_icm_send_mra(cm_id); ret = ib_post_send_mad(msg, NULL); if (ret) - goto error2; + goto error_free_msg; } cm_id->state = cm_state; @@ -3148,13 +3187,11 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, spin_unlock_irqrestore(&cm_id_priv->lock, flags); return 0; -error1: spin_unlock_irqrestore(&cm_id_priv->lock, flags); - kfree(data); - return ret; - -error2: spin_unlock_irqrestore(&cm_id_priv->lock, flags); - kfree(data); +error_free_msg: cm_free_msg(msg); +error_unlock: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + kfree(data); return ret; } EXPORT_SYMBOL(ib_send_cm_mra); @@ -3490,38 +3527,41 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, &cm_id_priv->av, cm_id_priv); if (ret) - goto out; + return ret; cm_id->service_id = param->service_id; cm_id->service_mask = ~cpu_to_be64(0); cm_id_priv->timeout_ms = param->timeout_ms; cm_id_priv->max_cm_retries = param->max_cm_retries; - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - goto out; - - cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv, - param); - msg->timeout_ms = cm_id_priv->timeout_ms; - msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT; spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id->state == IB_CM_IDLE) { - trace_icm_send_sidr_req(&cm_id_priv->id); - ret = ib_post_send_mad(msg, NULL); - } else { + if (cm_id->state != IB_CM_IDLE) { ret = -EINVAL; + goto out_unlock; } - if (ret) { - spin_unlock_irqrestore(&cm_id_priv->lock, flags); - cm_free_msg(msg); - goto out; + msg = cm_alloc_priv_msg(cm_id_priv); + if (IS_ERR(msg)) { + ret = PTR_ERR(msg); + goto out_unlock; } + + cm_format_sidr_req((struct cm_sidr_req_msg *)msg->mad, cm_id_priv, + param); + msg->timeout_ms = cm_id_priv->timeout_ms; + msg->context[1] = (void *)(unsigned long)IB_CM_SIDR_REQ_SENT; + + trace_icm_send_sidr_req(&cm_id_priv->id); + ret = ib_post_send_mad(msg, NULL); + if (ret) + goto out_free; cm_id->state = IB_CM_SIDR_REQ_SENT; - cm_id_priv->msg = msg; spin_unlock_irqrestore(&cm_id_priv->lock, flags); -out: + return 0; +out_free: + cm_free_priv_msg(msg); +out_unlock: + spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(ib_send_cm_sidr_req); @@ -3668,9 +3708,9 @@ static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv, if (cm_id_priv->id.state != IB_CM_SIDR_REQ_RCVD) return -EINVAL; - ret = cm_alloc_msg(cm_id_priv, &msg); - if (ret) - return ret; + msg = cm_alloc_msg(cm_id_priv); + if (IS_ERR(msg)) + return PTR_ERR(msg); cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv, param); From patchwork Tue May 11 08:22:07 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 12250175 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.7 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 4377AC43611 for ; Tue, 11 May 2021 08:31:40 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 12764611C2 for ; Tue, 11 May 2021 08:31:40 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231162AbhEKIcn (ORCPT ); Tue, 11 May 2021 04:32:43 -0400 Received: from mail.kernel.org ([198.145.29.99]:42602 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230333AbhEKIcj (ORCPT ); Tue, 11 May 2021 04:32:39 -0400 Received: by mail.kernel.org (Postfix) with ESMTPSA id 239A961928; Tue, 11 May 2021 08:22:35 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1620721356; bh=9BZAAzBXUsFnxzBksVs8V35KTMzLRNT1T8uiN0llGBY=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=H7Kf27d7iss6/EhpRGz72Q3VdPMJlJvC/jIZseQo7Yu3hyYKLFy0+uz+ZM2jsIfpc ovfaC9tuciJyEWCk+aZu1e1qP9kza6Bo9gbvpVel6vYCxXMxCuoW2PTPCY+EQTr6ov DzWAFJr9kXYd/ZToDQzmPG7QLAmZRz08rW8NPGxojw4zVUVMQxqsVlVl1Tq5TwIpKi 6UYF4Pdzw1Wff0CYX3oEdgGDIBuq0sewvolAoiBEu7Lprp6buZ4YtevaHIFiROL4em P+tHhMYGT7HMg6abd47luIWOSMImGq8lMjk93QaoLPh9CZyahLx6KSpQ3IDSpGDqo7 mYm62fqCOYGJA== From: Leon Romanovsky To: Doug Ledford , Jason Gunthorpe Cc: linux-kernel@vger.kernel.org, linux-rdma@vger.kernel.org, Mark Zhang , Sean Hefty Subject: [PATCH rdma-next v3 3/8] IB/cm: Call the correct message free functions in cm_send_handler() Date: Tue, 11 May 2021 11:22:07 +0300 Message-Id: <2b7b09d9c9f53a5c7c3695919828234e9f873fe4.1620720467.git.leonro@nvidia.com> X-Mailer: git-send-email 2.31.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Jason Gunthorpe There are now three destroy functions for the cm_msg, and all places except the general send completion handler use the correct function. Fix cm_send_handler() to detect which kind of message is being completed and destroy it using the correct function with the correct locking. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cm.c | 52 +++++++++++++++++------------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 94613275edcc..8dbc39ea4612 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3795,22 +3795,26 @@ static int cm_sidr_rep_handler(struct cm_work *work) return -EINVAL; } -static void cm_process_send_error(struct ib_mad_send_buf *msg, +static void cm_process_send_error(struct cm_id_private *cm_id_priv, + struct ib_mad_send_buf *msg, + enum ib_cm_state state, enum ib_wc_status wc_status) { - struct cm_id_private *cm_id_priv; - struct ib_cm_event cm_event; - enum ib_cm_state state; + struct ib_cm_event cm_event = {}; int ret; - memset(&cm_event, 0, sizeof cm_event); - cm_id_priv = msg->context[0]; - /* Discard old sends or ones without a response. */ spin_lock_irq(&cm_id_priv->lock); - state = (enum ib_cm_state) (unsigned long) msg->context[1]; - if (msg != cm_id_priv->msg || state != cm_id_priv->id.state) - goto discard; + if (msg != cm_id_priv->msg) { + spin_unlock_irq(&cm_id_priv->lock); + cm_free_msg(msg); + return; + } + cm_free_priv_msg(msg); + + if (state != cm_id_priv->id.state || wc_status == IB_WC_SUCCESS || + wc_status == IB_WC_WR_FLUSH_ERR) + goto out_unlock; trace_icm_mad_send_err(state, wc_status); switch (state) { @@ -3833,26 +3837,27 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg, cm_event.event = IB_CM_SIDR_REQ_ERROR; break; default: - goto discard; + goto out_unlock; } spin_unlock_irq(&cm_id_priv->lock); cm_event.param.send_status = wc_status; /* No other events can occur on the cm_id at this point. */ ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event); - cm_free_msg(msg); if (ret) ib_destroy_cm_id(&cm_id_priv->id); return; -discard: +out_unlock: spin_unlock_irq(&cm_id_priv->lock); - cm_free_msg(msg); } static void cm_send_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_wc *mad_send_wc) { struct ib_mad_send_buf *msg = mad_send_wc->send_buf; + struct cm_id_private *cm_id_priv = msg->context[0]; + enum ib_cm_state state = + (enum ib_cm_state)(unsigned long)msg->context[1]; struct cm_port *port; u16 attr_index; @@ -3865,7 +3870,7 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent, * set to a cm_id), and is not a REJ, then it is a send that was * manually retried. */ - if (!msg->context[0] && (attr_index != CM_REJ_COUNTER)) + if (!cm_id_priv && (attr_index != CM_REJ_COUNTER)) msg->retries = 1; atomic_long_add(1 + msg->retries, @@ -3875,18 +3880,11 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent, &port->counter_group[CM_XMIT_RETRIES]. counter[attr_index]); - switch (mad_send_wc->status) { - case IB_WC_SUCCESS: - case IB_WC_WR_FLUSH_ERR: - cm_free_msg(msg); - break; - default: - if (msg->context[0] && msg->context[1]) - cm_process_send_error(msg, mad_send_wc->status); - else - cm_free_msg(msg); - break; - } + if (cm_id_priv) + cm_process_send_error(cm_id_priv, msg, state, + mad_send_wc->status); + else + cm_free_response_msg(msg); } static void cm_work_handler(struct work_struct *_work) From patchwork Tue May 11 08:22:08 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 12250181 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.7 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id B46D9C4361B for ; Tue, 11 May 2021 08:31:42 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 88CF661376 for ; Tue, 11 May 2021 08:31:42 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231213AbhEKIcq (ORCPT ); Tue, 11 May 2021 04:32:46 -0400 Received: from mail.kernel.org ([198.145.29.99]:42628 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230497AbhEKIcj (ORCPT ); Tue, 11 May 2021 04:32:39 -0400 Received: by mail.kernel.org (Postfix) with ESMTPSA id 97ECF61939; Tue, 11 May 2021 08:23:06 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1620721387; bh=L3SmULuPSgKX3gl3jsr8CT000rQQvi51E9jdxl6RL08=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=Fv6M3J3toL2iLBZSaKMGS7dOGqmKe0v9zP3kLoEEfY5COPM1Pv5DMmcfxpOpipv8M LPOaqg+UOatBG12cWDJhIpJLlDFzMDLPy2pBOmHkOW552GngSVNH2uYmgBMZiHe5te cFqEA2FLyj/JdvKmtAG7nS0Yy7MAzXhDKVzLlrps30FRPymIHkzl/Gf3JjAVR3cCq+ vw7S46Lx9ZIC4WvztTt4eRhOnPTwbR0I72rvVD+LYl1e2QotIxPnZUnv37Cz4vKAI4 RS/9eQCcTkQmoCN9XcW0rPxuDx02jvSHR88O5C+XC7q83kdFfvcv48sWbv5luh/hL0 eWaoC1rYD9nzg== From: Leon Romanovsky To: Doug Ledford , Jason Gunthorpe Cc: linux-kernel@vger.kernel.org, linux-rdma@vger.kernel.org, Mark Zhang , Sean Hefty Subject: [PATCH rdma-next v3 4/8] IB/cm: Tidy remaining cm_msg free paths Date: Tue, 11 May 2021 11:22:08 +0300 Message-Id: <07c079c8cab2c1276aeac4d8e925591b28e4d37c.1620720467.git.leonro@nvidia.com> X-Mailer: git-send-email 2.31.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Jason Gunthorpe Now that all the free paths are explicit cm_free_msg() will only be called for msgs's allocated with cm_alloc_msg(), so we can assume the context is set. Place it after the allocation function it is paired with for clarity. Also remove a bogus NULL assignment in one place after a cancel. This does nothing other than disable completions to become events, but changing the state already did that. Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cm.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 8dbc39ea4612..1f0bc31ca0e2 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -367,6 +367,16 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv) return ERR_PTR(ret); } +static void cm_free_msg(struct ib_mad_send_buf *msg) +{ + struct cm_id_private *cm_id_priv = msg->context[0]; + + if (msg->ah) + rdma_destroy_ah(msg->ah, 0); + cm_deref_id(cm_id_priv); + ib_free_send_mad(msg); +} + static struct ib_mad_send_buf * cm_alloc_priv_msg(struct cm_id_private *cm_id_priv) { @@ -420,15 +430,6 @@ static int cm_create_response_msg_ah(struct cm_port *port, return 0; } -static void cm_free_msg(struct ib_mad_send_buf *msg) -{ - if (msg->ah) - rdma_destroy_ah(msg->ah, 0); - if (msg->context[0]) - cm_deref_id(msg->context[0]); - ib_free_send_mad(msg); -} - static int cm_alloc_response_msg(struct cm_port *port, struct ib_mad_recv_wc *mad_recv_wc, struct ib_mad_send_buf **msg) @@ -3455,7 +3456,6 @@ static int cm_apr_handler(struct cm_work *work) } cm_id_priv->id.lap_state = IB_CM_LAP_IDLE; ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); - cm_id_priv->msg = NULL; cm_queue_work_unlock(cm_id_priv, work); return 0; out: From patchwork Tue May 11 08:22:09 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 12250179 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.7 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id D9382C43616 for ; Tue, 11 May 2021 08:31:41 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id AE2386128C for ; Tue, 11 May 2021 08:31:41 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231204AbhEKIcq (ORCPT ); Tue, 11 May 2021 04:32:46 -0400 Received: from mail.kernel.org ([198.145.29.99]:42626 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230492AbhEKIcj (ORCPT ); Tue, 11 May 2021 04:32:39 -0400 Received: by mail.kernel.org (Postfix) with ESMTPSA id 7021161934; Tue, 11 May 2021 08:22:51 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1620721372; bh=eG632swNww8pxyqsnuhpgUmrkkfwdI68ow+BvuIoZSw=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=boucnISNE+OJUw54nLTDtGItgmNXUy3BLp/7DQcZfU4AATy3QFYlGLT7ANU62CaKA RSptcrTkvuSbp7tuEp6PGtnDjlhpU+HfRINStX66f39Vpc0mBXyFVlJNMEL3UeS5+9 184c7KEoIuIfHMFPz5W+Wp57x5iSolfzWSuS62afzokCeA636jNwxyT8AHgVuP7vyM SUU0VqkOK5b7Ta18nq/lRyjvmKbxIrJ7GNN/D7BxEsZ+DTqagQo6Ppr/bHtuJyAUhj V5F7bWDxhL6iy1JheHKMmNq4FU2FszenHtZXnxgrmNooli2mFtfYfm4YDriWga93FQ +FIIn79Wxwvgw== From: Leon Romanovsky To: Doug Ledford , Jason Gunthorpe Cc: Mark Zhang , linux-kernel@vger.kernel.org, linux-rdma@vger.kernel.org, Sean Hefty Subject: [PATCH rdma-next v3 5/8] Revert "IB/cm: Mark stale CM id's whenever the mad agent was unregistered" Date: Tue, 11 May 2021 11:22:09 +0300 Message-Id: <6f918ccad93ad98df5c1f50953d97a3e6fc87068.1620720467.git.leonro@nvidia.com> X-Mailer: git-send-email 2.31.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Mark Zhang This reverts commit 9db0ff53cb9b43ed75bacd42a89c1a0ab048b2b0, which wasn't full and still causes to the following panic: panic @ time 1605623870.843, thread 0xfffffeb63b552000: vm_fault_lookup: fault on nofault entry, addr: 0xfffffe811a94e000 time = 1605623870 cpuid = 9, TSC = 0xb7937acc1b6 Panic occurred in module kernel loaded at 0xffffffff80200000:Stack: -------------------------------------------------- kernel:vm_fault+0x19da kernel:vm_fault_trap+0x6e kernel:trap_pfault+0x1f1 kernel:trap+0x31e kernel:cm_destroy_id+0x38c kernel:rdma_destroy_id+0x127 kernel:sdp_shutdown_task+0x3ae kernel:taskqueue_run_locked+0x10b kernel:taskqueue_thread_loop+0x87 kernel:fork_exit+0x83 Signed-off-by: Mark Zhang Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cm.c | 123 ++++------------------------------- 1 file changed, 14 insertions(+), 109 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 1f0bc31ca0e2..8a7ac605fded 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -121,8 +121,6 @@ static struct ib_cm { __be32 random_id_operand; struct list_head timewait_list; struct workqueue_struct *wq; - /* Sync on cm change port state */ - spinlock_t state_lock; } cm; /* Counter indexes ordered by attribute ID */ @@ -203,8 +201,6 @@ struct cm_port { struct cm_device *cm_dev; struct ib_mad_agent *mad_agent; u32 port_num; - struct list_head cm_priv_prim_list; - struct list_head cm_priv_altr_list; struct cm_counter_group counter_group[CM_COUNTER_GROUPS]; }; @@ -285,12 +281,6 @@ struct cm_id_private { u8 service_timeout; u8 target_ack_delay; - struct list_head prim_list; - struct list_head altr_list; - /* Indicates that the send port mad is registered and av is set */ - int prim_send_port_not_ready; - int altr_send_port_not_ready; - struct list_head work_list; atomic_t work_count; @@ -310,47 +300,20 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv) struct ib_mad_agent *mad_agent; struct ib_mad_send_buf *m; struct ib_ah *ah; - struct cm_av *av; - unsigned long flags, flags2; - int ret = 0; - /* don't let the port to be released till the agent is down */ - spin_lock_irqsave(&cm.state_lock, flags2); - spin_lock_irqsave(&cm.lock, flags); - if (!cm_id_priv->prim_send_port_not_ready) - av = &cm_id_priv->av; - else if (!cm_id_priv->altr_send_port_not_ready && - (cm_id_priv->alt_av.port)) - av = &cm_id_priv->alt_av; - else { - pr_info("%s: not valid CM id\n", __func__); - ret = -ENODEV; - spin_unlock_irqrestore(&cm.lock, flags); - goto out; - } - spin_unlock_irqrestore(&cm.lock, flags); - /* Make sure the port haven't released the mad yet */ mad_agent = cm_id_priv->av.port->mad_agent; - if (!mad_agent) { - pr_info("%s: not a valid MAD agent\n", __func__); - ret = -ENODEV; - goto out; - } - ah = rdma_create_ah(mad_agent->qp->pd, &av->ah_attr, 0); - if (IS_ERR(ah)) { - ret = PTR_ERR(ah); - goto out; - } + ah = rdma_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr, 0); + if (IS_ERR(ah)) + return (void *)ah; m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, - av->pkey_index, + cm_id_priv->av.pkey_index, 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_ATOMIC, IB_MGMT_BASE_VERSION); if (IS_ERR(m)) { rdma_destroy_ah(ah, 0); - ret = PTR_ERR(m); - goto out; + return m; } /* Timeout set by caller if response is expected. */ @@ -358,13 +321,8 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv) m->retries = cm_id_priv->max_cm_retries; refcount_inc(&cm_id_priv->refcount); - spin_unlock_irqrestore(&cm.state_lock, flags2); m->context[0] = cm_id_priv; return m; - -out: - spin_unlock_irqrestore(&cm.state_lock, flags2); - return ERR_PTR(ret); } static void cm_free_msg(struct ib_mad_send_buf *msg) @@ -518,21 +476,6 @@ static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, grh, &av->ah_attr); } -static void add_cm_id_to_port_list(struct cm_id_private *cm_id_priv, - struct cm_av *av, struct cm_port *port) -{ - unsigned long flags; - - spin_lock_irqsave(&cm.lock, flags); - if (&cm_id_priv->av == av) - list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list); - else if (&cm_id_priv->alt_av == av) - list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list); - else - WARN_ON(true); - spin_unlock_irqrestore(&cm.lock, flags); -} - static struct cm_port * get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr) { @@ -576,8 +519,7 @@ get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr) static int cm_init_av_by_path(struct sa_path_rec *path, const struct ib_gid_attr *sgid_attr, - struct cm_av *av, - struct cm_id_private *cm_id_priv) + struct cm_av *av) { struct rdma_ah_attr new_ah_attr; struct cm_device *cm_dev; @@ -611,7 +553,6 @@ static int cm_init_av_by_path(struct sa_path_rec *path, return ret; av->timeout = path->packet_life_time + 1; - add_cm_id_to_port_list(cm_id_priv, av, port); rdma_move_ah_attr(&av->ah_attr, &new_ah_attr); return 0; } @@ -891,8 +832,6 @@ static struct cm_id_private *cm_alloc_id_priv(struct ib_device *device, spin_lock_init(&cm_id_priv->lock); init_completion(&cm_id_priv->comp); INIT_LIST_HEAD(&cm_id_priv->work_list); - INIT_LIST_HEAD(&cm_id_priv->prim_list); - INIT_LIST_HEAD(&cm_id_priv->altr_list); atomic_set(&cm_id_priv->work_count, -1); refcount_set(&cm_id_priv->refcount, 1); @@ -1193,12 +1132,7 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err) kfree(cm_id_priv->timewait_info); cm_id_priv->timewait_info = NULL; } - if (!list_empty(&cm_id_priv->altr_list) && - (!cm_id_priv->altr_send_port_not_ready)) - list_del(&cm_id_priv->altr_list); - if (!list_empty(&cm_id_priv->prim_list) && - (!cm_id_priv->prim_send_port_not_ready)) - list_del(&cm_id_priv->prim_list); + WARN_ON(cm_id_priv->listen_sharecount); WARN_ON(!RB_EMPTY_NODE(&cm_id_priv->service_node)); if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) @@ -1566,13 +1500,12 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, } ret = cm_init_av_by_path(param->primary_path, - param->ppath_sgid_attr, &cm_id_priv->av, - cm_id_priv); + param->ppath_sgid_attr, &cm_id_priv->av); if (ret) goto out; if (param->alternate_path) { ret = cm_init_av_by_path(param->alternate_path, NULL, - &cm_id_priv->alt_av, cm_id_priv); + &cm_id_priv->alt_av); if (ret) goto out; } @@ -2204,8 +2137,7 @@ static int cm_req_handler(struct cm_work *work) sa_path_set_dmac(&work->path[0], cm_id_priv->av.ah_attr.roce.dmac); work->path[0].hop_limit = grh->hop_limit; - ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av, - cm_id_priv); + ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av); if (ret) { int err; @@ -2224,7 +2156,7 @@ static int cm_req_handler(struct cm_work *work) } if (cm_req_has_alt_path(req_msg)) { ret = cm_init_av_by_path(&work->path[1], NULL, - &cm_id_priv->alt_av, cm_id_priv); + &cm_id_priv->alt_av); if (ret) { ib_send_cm_rej(&cm_id_priv->id, IB_CM_REJ_INVALID_ALT_GID, @@ -3405,7 +3337,7 @@ static int cm_lap_handler(struct cm_work *work) goto unlock; ret = cm_init_av_by_path(param->alternate_path, NULL, - &cm_id_priv->alt_av, cm_id_priv); + &cm_id_priv->alt_av); if (ret) goto unlock; @@ -3524,8 +3456,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, cm_id_priv = container_of(cm_id, struct cm_id_private, id); ret = cm_init_av_by_path(param->path, param->sgid_attr, - &cm_id_priv->av, - cm_id_priv); + &cm_id_priv->av); if (ret) return ret; @@ -4008,9 +3939,7 @@ static int cm_establish(struct ib_cm_id *cm_id) static int cm_migrate(struct ib_cm_id *cm_id) { struct cm_id_private *cm_id_priv; - struct cm_av tmp_av; unsigned long flags; - int tmp_send_port_not_ready; int ret = 0; cm_id_priv = container_of(cm_id, struct cm_id_private, id); @@ -4019,14 +3948,7 @@ static int cm_migrate(struct ib_cm_id *cm_id) (cm_id->lap_state == IB_CM_LAP_UNINIT || cm_id->lap_state == IB_CM_LAP_IDLE)) { cm_id->lap_state = IB_CM_LAP_IDLE; - /* Swap address vector */ - tmp_av = cm_id_priv->av; cm_id_priv->av = cm_id_priv->alt_av; - cm_id_priv->alt_av = tmp_av; - /* Swap port send ready state */ - tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready; - cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready; - cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready; } else ret = -EINVAL; spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -4401,9 +4323,6 @@ static int cm_add_one(struct ib_device *ib_device) port->cm_dev = cm_dev; port->port_num = i; - INIT_LIST_HEAD(&port->cm_priv_prim_list); - INIT_LIST_HEAD(&port->cm_priv_altr_list); - ret = cm_create_port_fs(port); if (ret) goto error1; @@ -4467,8 +4386,6 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) { struct cm_device *cm_dev = client_data; struct cm_port *port; - struct cm_id_private *cm_id_priv; - struct ib_mad_agent *cur_mad_agent; struct ib_port_modify port_modify = { .clr_port_cap_mask = IB_PORT_CM_SUP }; @@ -4489,24 +4406,13 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) port = cm_dev->port[i-1]; ib_modify_port(ib_device, port->port_num, 0, &port_modify); - /* Mark all the cm_id's as not valid */ - spin_lock_irq(&cm.lock); - list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list) - cm_id_priv->altr_send_port_not_ready = 1; - list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list) - cm_id_priv->prim_send_port_not_ready = 1; - spin_unlock_irq(&cm.lock); /* * We flush the queue here after the going_down set, this * verify that no new works will be queued in the recv handler, * after that we can call the unregister_mad_agent */ flush_workqueue(cm.wq); - spin_lock_irq(&cm.state_lock); - cur_mad_agent = port->mad_agent; - port->mad_agent = NULL; - spin_unlock_irq(&cm.state_lock); - ib_unregister_mad_agent(cur_mad_agent); + ib_unregister_mad_agent(port->mad_agent); cm_remove_port_fs(port); kfree(port); } @@ -4521,7 +4427,6 @@ static int __init ib_cm_init(void) INIT_LIST_HEAD(&cm.device_list); rwlock_init(&cm.device_lock); spin_lock_init(&cm.lock); - spin_lock_init(&cm.state_lock); cm.listen_service_table = RB_ROOT; cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID); cm.remote_id_table = RB_ROOT; From patchwork Tue May 11 08:22:10 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 12250177 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.7 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 5399AC43617 for ; Tue, 11 May 2021 08:31:40 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 3D1AE61184 for ; Tue, 11 May 2021 08:31:40 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231176AbhEKIcn (ORCPT ); Tue, 11 May 2021 04:32:43 -0400 Received: from mail.kernel.org ([198.145.29.99]:42600 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230476AbhEKIcj (ORCPT ); Tue, 11 May 2021 04:32:39 -0400 Received: by mail.kernel.org (Postfix) with ESMTPSA id 09BDB61936; Tue, 11 May 2021 08:22:58 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1620721379; bh=6JEGr+HD0Ix55fkbI/WtiN41dvT672eRbCNY9rPRSts=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=N/kdL1bSby/lSTmEazT9vK98t3/jp9awoQZswp5DzOCgK0jEIzxvgDzQFn/mKkahJ 22DeiuWwfpLRZ47hfpoxmPxmq9gySeu1y23bvmMHOvB7R4bXQUmZRmE1u5qJRObEYc xXktBnRR7VTcYLy3PQIEm1WmfpSrTrNwqnHjEkrdQmYDrsDR34TrhVHjPqBqVbZaw9 YTZkQwOKVAebnyMuAMUDE+zfAui5lLiBoKD245MsOGA8mfPAPRHXMTFjCQ3pa6ICGi Z4pi/iPQPRxpmzbgW7qE8e6Wi50syYArkW5H6nGUvL0KOmw5/uqsfqWveD6h4z8oTR b1OkNm5q9GkrQ== From: Leon Romanovsky To: Doug Ledford , Jason Gunthorpe Cc: Mark Zhang , linux-kernel@vger.kernel.org, linux-rdma@vger.kernel.org, Sean Hefty Subject: [PATCH rdma-next v3 6/8] IB/cm: Simplify ib_cancel_mad() and ib_modify_mad() calls Date: Tue, 11 May 2021 11:22:10 +0300 Message-Id: X-Mailer: git-send-email 2.31.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Mark Zhang The mad_agent parameter is redundant since the struct ib_mad_send_buf already has a pointer of it. Signed-off-by: Mark Zhang Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cm.c | 42 ++++++++++++++---------------- drivers/infiniband/core/mad.c | 17 +++++------- drivers/infiniband/core/sa_query.c | 4 +-- include/rdma/ib_mad.h | 27 +++++++++---------- 4 files changed, 39 insertions(+), 51 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 8a7ac605fded..2d62c90f9790 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1058,7 +1058,7 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err) break; case IB_CM_SIDR_REQ_SENT: cm_id->state = IB_CM_IDLE; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); break; case IB_CM_SIDR_REQ_RCVD: cm_send_sidr_rep_locked(cm_id_priv, @@ -1069,7 +1069,7 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err) break; case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); cm_send_rej_locked(cm_id_priv, IB_CM_REJ_TIMEOUT, &cm_id_priv->id.device->node_guid, sizeof(cm_id_priv->id.device->node_guid), @@ -1087,7 +1087,7 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err) break; case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); cm_send_rej_locked(cm_id_priv, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, NULL, 0); goto retest; @@ -1105,7 +1105,7 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err) cm_send_dreq_locked(cm_id_priv, NULL, 0); goto retest; case IB_CM_DREQ_SENT: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); cm_enter_timewait(cm_id_priv); goto retest; case IB_CM_DREQ_RCVD: @@ -2531,7 +2531,7 @@ static int cm_rep_handler(struct cm_work *work) cm_ack_timeout(cm_id_priv->target_ack_delay, cm_id_priv->alt_av.timeout - 1); - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); cm_queue_work_unlock(cm_id_priv, work); return 0; @@ -2555,7 +2555,7 @@ static int cm_establish_handler(struct cm_work *work) goto out; } - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); cm_queue_work_unlock(cm_id_priv, work); return 0; out: @@ -2588,7 +2588,7 @@ static int cm_rtu_handler(struct cm_work *work) } cm_id_priv->id.state = IB_CM_ESTABLISHED; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); cm_queue_work_unlock(cm_id_priv, work); return 0; out: @@ -2633,7 +2633,7 @@ static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv, if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT || cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); msg = cm_alloc_priv_msg(cm_id_priv); if (IS_ERR(msg)) { @@ -2807,12 +2807,12 @@ static int cm_dreq_handler(struct cm_work *work) switch (cm_id_priv->id.state) { case IB_CM_REP_SENT: case IB_CM_DREQ_SENT: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); break; case IB_CM_ESTABLISHED: if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT || cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); break; case IB_CM_MRA_REP_RCVD: break; @@ -2873,7 +2873,7 @@ static int cm_drep_handler(struct cm_work *work) } cm_enter_timewait(cm_id_priv); - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); cm_queue_work_unlock(cm_id_priv, work); return 0; out: @@ -3009,7 +3009,7 @@ static int cm_rej_handler(struct cm_work *work) case IB_CM_MRA_REQ_RCVD: case IB_CM_REP_SENT: case IB_CM_MRA_REP_RCVD: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); fallthrough; case IB_CM_REQ_RCVD: case IB_CM_MRA_REQ_SENT: @@ -3019,7 +3019,7 @@ static int cm_rej_handler(struct cm_work *work) cm_reset_to_idle(cm_id_priv); break; case IB_CM_DREQ_SENT: - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); fallthrough; case IB_CM_REP_RCVD: case IB_CM_MRA_REP_SENT: @@ -3029,8 +3029,7 @@ static int cm_rej_handler(struct cm_work *work) if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT || cm_id_priv->id.lap_state == IB_CM_LAP_SENT) { if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT) - ib_cancel_mad(cm_id_priv->av.port->mad_agent, - cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); cm_enter_timewait(cm_id_priv); break; } @@ -3169,16 +3168,14 @@ static int cm_mra_handler(struct cm_work *work) case IB_CM_REQ_SENT: if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) != CM_MSG_RESPONSE_REQ || - ib_modify_mad(cm_id_priv->av.port->mad_agent, - cm_id_priv->msg, timeout)) + ib_modify_mad(cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD; break; case IB_CM_REP_SENT: if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) != CM_MSG_RESPONSE_REP || - ib_modify_mad(cm_id_priv->av.port->mad_agent, - cm_id_priv->msg, timeout)) + ib_modify_mad(cm_id_priv->msg, timeout)) goto out; cm_id_priv->id.state = IB_CM_MRA_REP_RCVD; break; @@ -3186,8 +3183,7 @@ static int cm_mra_handler(struct cm_work *work) if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) != CM_MSG_RESPONSE_OTHER || cm_id_priv->id.lap_state != IB_CM_LAP_SENT || - ib_modify_mad(cm_id_priv->av.port->mad_agent, - cm_id_priv->msg, timeout)) { + ib_modify_mad(cm_id_priv->msg, timeout)) { if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) atomic_long_inc(&work->port-> counter_group[CM_RECV_DUPLICATES]. @@ -3387,7 +3383,7 @@ static int cm_apr_handler(struct cm_work *work) goto out; } cm_id_priv->id.lap_state = IB_CM_LAP_IDLE; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); cm_queue_work_unlock(cm_id_priv, work); return 0; out: @@ -3715,7 +3711,7 @@ static int cm_sidr_rep_handler(struct cm_work *work) goto out; } cm_id_priv->id.state = IB_CM_IDLE; - ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); + ib_cancel_mad(cm_id_priv->msg); spin_unlock_irq(&cm_id_priv->lock); cm_format_sidr_rep_event(work, cm_id_priv); diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 2081e4854fb0..df6226f45047 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -2459,16 +2459,18 @@ find_send_wr(struct ib_mad_agent_private *mad_agent_priv, return NULL; } -int ib_modify_mad(struct ib_mad_agent *mad_agent, - struct ib_mad_send_buf *send_buf, u32 timeout_ms) +int ib_modify_mad(struct ib_mad_send_buf *send_buf, u32 timeout_ms) { struct ib_mad_agent_private *mad_agent_priv; struct ib_mad_send_wr_private *mad_send_wr; unsigned long flags; int active; - mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, - agent); + if (!send_buf) + return -EINVAL; + + mad_agent_priv = container_of(send_buf->mad_agent, + struct ib_mad_agent_private, agent); spin_lock_irqsave(&mad_agent_priv->lock, flags); mad_send_wr = find_send_wr(mad_agent_priv, send_buf); if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) { @@ -2493,13 +2495,6 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent, } EXPORT_SYMBOL(ib_modify_mad); -void ib_cancel_mad(struct ib_mad_agent *mad_agent, - struct ib_mad_send_buf *send_buf) -{ - ib_modify_mad(mad_agent, send_buf, 0); -} -EXPORT_SYMBOL(ib_cancel_mad); - static void local_completions(struct work_struct *work) { struct ib_mad_agent_private *mad_agent_priv; diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 8f1705c403b4..9a4a49c37922 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1172,7 +1172,6 @@ EXPORT_SYMBOL(ib_sa_unregister_client); void ib_sa_cancel_query(int id, struct ib_sa_query *query) { unsigned long flags; - struct ib_mad_agent *agent; struct ib_mad_send_buf *mad_buf; xa_lock_irqsave(&queries, flags); @@ -1180,7 +1179,6 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query) xa_unlock_irqrestore(&queries, flags); return; } - agent = query->port->agent; mad_buf = query->mad_buf; xa_unlock_irqrestore(&queries, flags); @@ -1190,7 +1188,7 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query) * sent to the MAD layer and has to be cancelled from there. */ if (!ib_nl_cancel_request(query)) - ib_cancel_mad(agent, mad_buf); + ib_cancel_mad(mad_buf); } EXPORT_SYMBOL(ib_sa_cancel_query); diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index f1d34f06a68b..465b0d0bdaf8 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -717,28 +717,27 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, */ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc); -/** - * ib_cancel_mad - Cancels an outstanding send MAD operation. - * @mad_agent: Specifies the registration associated with sent MAD. - * @send_buf: Indicates the MAD to cancel. - * - * MADs will be returned to the user through the corresponding - * ib_mad_send_handler. - */ -void ib_cancel_mad(struct ib_mad_agent *mad_agent, - struct ib_mad_send_buf *send_buf); - /** * ib_modify_mad - Modifies an outstanding send MAD operation. - * @mad_agent: Specifies the registration associated with sent MAD. * @send_buf: Indicates the MAD to modify. * @timeout_ms: New timeout value for sent MAD. * * This call will reset the timeout value for a sent MAD to the specified * value. */ -int ib_modify_mad(struct ib_mad_agent *mad_agent, - struct ib_mad_send_buf *send_buf, u32 timeout_ms); +int ib_modify_mad(struct ib_mad_send_buf *send_buf, u32 timeout_ms); + +/** + * ib_cancel_mad - Cancels an outstanding send MAD operation. + * @send_buf: Indicates the MAD to cancel. + * + * MADs will be returned to the user through the corresponding + * ib_mad_send_handler. + */ +static inline void ib_cancel_mad(struct ib_mad_send_buf *send_buf) +{ + ib_modify_mad(send_buf, 0); +} /** * ib_create_send_mad - Allocate and initialize a data buffer and work request From patchwork Tue May 11 08:22:11 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 12250183 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.7 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id ED13AC43462 for ; Tue, 11 May 2021 08:31:43 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id B953161184 for ; Tue, 11 May 2021 08:31:43 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231218AbhEKIcr (ORCPT ); Tue, 11 May 2021 04:32:47 -0400 Received: from mail.kernel.org ([198.145.29.99]:42646 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230514AbhEKIcj (ORCPT ); Tue, 11 May 2021 04:32:39 -0400 Received: by mail.kernel.org (Postfix) with ESMTPSA id CB3A661937; Tue, 11 May 2021 08:23:21 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1620721402; bh=TJJ0HlA7YmggpHRXsCnvz+1yfDn4QnGmIyklAHsRH28=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=SR4kuCBtUtJd1wGRlKO3KgjlN3SHxPylPXqd/67fISvkDICZbOEbaFBIXZOBmug6N al8OZwYz7E0YrapmerDmu1CYVvXA+euN36U0CSLu6aq9o1+ThOt4/YvAF1Me+TbZWG N/W4FLi5EOnyym2fljxpZRljnI8Hwl3k4ltt3xD7vGjzgjTrg3fveesZJQ/LdD9ytO oZtX6oTBcj8AwbWs28MRZqSdcoWHZR6ps3N9vxTfAVfP9Kk42igyTqXOuALoZJzEBx FqTK9Jddg45mw+/ugIxL41rFlrWyE+Q0HQg4z2Chwn9xic4O4ZvP5K98Jl5hsyR0Bq bPjaHrSxGSR/A== From: Leon Romanovsky To: Doug Ledford , Jason Gunthorpe Cc: Mark Zhang , linux-kernel@vger.kernel.org, linux-rdma@vger.kernel.org, Sean Hefty Subject: [PATCH rdma-next v3 7/8] IB/cm: Improve the calling of cm_init_av_for_lap and cm_init_av_by_path Date: Tue, 11 May 2021 11:22:11 +0300 Message-Id: <7adea3d5550d59b25feaea3a3e175c0918df2997.1620720467.git.leonro@nvidia.com> X-Mailer: git-send-email 2.31.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Mark Zhang The cm_init_av_for_lap() and cm_init_av_by_path() function calls have the following issues: 1. Both of them might sleep and should not be called under spinlock. 2. The access of cm_id_priv->av should be under cm_id_priv->lock, which means it cann't be initialized directly. This patch splits the calling of 2 functions into two parts: first one initializes an AV outside of the spinlock, the second one copies AV to cm_id_priv->av under spinlock. Fixes: e1444b5a163e ("IB/cm: Fix automatic path migration support") Signed-off-by: Mark Zhang Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cm.c | 105 +++++++++++++++++++---------------- 1 file changed, 58 insertions(+), 47 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 2d62c90f9790..2e4658795e8e 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -440,30 +440,12 @@ static void cm_set_private_data(struct cm_id_private *cm_id_priv, cm_id_priv->private_data_len = private_data_len; } -static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc, - struct ib_grh *grh, struct cm_av *av) +static void cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc, + struct rdma_ah_attr *ah_attr, struct cm_av *av) { - struct rdma_ah_attr new_ah_attr; - int ret; - av->port = port; av->pkey_index = wc->pkey_index; - - /* - * av->ah_attr might be initialized based on past wc during incoming - * connect request or while sending out connect request. So initialize - * a new ah_attr on stack. If initialization fails, old ah_attr is - * used for sending any responses. If initialization is successful, - * than new ah_attr is used by overwriting old one. - */ - ret = ib_init_ah_attr_from_wc(port->cm_dev->ib_device, - port->port_num, wc, - grh, &new_ah_attr); - if (ret) - return ret; - - rdma_move_ah_attr(&av->ah_attr, &new_ah_attr); - return 0; + rdma_move_ah_attr(&av->ah_attr, ah_attr); } static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, @@ -557,6 +539,20 @@ static int cm_init_av_by_path(struct sa_path_rec *path, return 0; } +/* Move av created by cm_init_av_by_path(), so av.dgid is not moved */ +static void cm_move_av_from_path(struct cm_av *dest, struct cm_av *src) +{ + dest->port = src->port; + dest->pkey_index = src->pkey_index; + rdma_move_ah_attr(&dest->ah_attr, &src->ah_attr); + dest->timeout = src->timeout; +} + +static void cm_destroy_av(struct cm_av *av) +{ + rdma_destroy_ah_attr(&av->ah_attr); +} + static u32 cm_local_id(__be32 local_id) { return (__force u32) (local_id ^ cm.random_id_operand); @@ -1146,8 +1142,8 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err) while ((work = cm_dequeue_work(cm_id_priv)) != NULL) cm_free_work(work); - rdma_destroy_ah_attr(&cm_id_priv->av.ah_attr); - rdma_destroy_ah_attr(&cm_id_priv->alt_av.ah_attr); + cm_destroy_av(&cm_id_priv->av); + cm_destroy_av(&cm_id_priv->alt_av); kfree(cm_id_priv->private_data); kfree_rcu(cm_id_priv, rcu); } @@ -1471,6 +1467,7 @@ static int cm_validate_req_param(struct ib_cm_req_param *param) int ib_send_cm_req(struct ib_cm_id *cm_id, struct ib_cm_req_param *param) { + struct cm_av av = {}, alt_av = {}; struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; struct cm_req_msg *req_msg; @@ -1486,8 +1483,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_IDLE || WARN_ON(cm_id_priv->timewait_info)) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = -EINVAL; - goto out; + return -EINVAL; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -1496,18 +1492,20 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, if (IS_ERR(cm_id_priv->timewait_info)) { ret = PTR_ERR(cm_id_priv->timewait_info); cm_id_priv->timewait_info = NULL; - goto out; + return ret; } ret = cm_init_av_by_path(param->primary_path, - param->ppath_sgid_attr, &cm_id_priv->av); + param->ppath_sgid_attr, &av); if (ret) - goto out; + return ret; if (param->alternate_path) { ret = cm_init_av_by_path(param->alternate_path, NULL, - &cm_id_priv->alt_av); - if (ret) - goto out; + &alt_av); + if (ret) { + cm_destroy_av(&av); + return ret; + } } cm_id->service_id = param->service_id; cm_id->service_mask = ~cpu_to_be64(0); @@ -1524,6 +1522,11 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, cm_id_priv->qp_type = param->qp_type; spin_lock_irqsave(&cm_id_priv->lock, flags); + + cm_move_av_from_path(&cm_id_priv->av, &av); + if (param->alternate_path) + cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av); + msg = cm_alloc_priv_msg(cm_id_priv); if (IS_ERR(msg)) { ret = PTR_ERR(msg); @@ -1551,7 +1554,6 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, cm_free_priv_msg(msg); out_unlock: spin_unlock_irqrestore(&cm_id_priv->lock, flags); -out: return ret; } EXPORT_SYMBOL(ib_send_cm_req); @@ -3264,6 +3266,8 @@ static int cm_lap_handler(struct cm_work *work) struct cm_lap_msg *lap_msg; struct ib_cm_lap_event_param *param; struct ib_mad_send_buf *msg = NULL; + struct rdma_ah_attr ah_attr; + struct cm_av alt_av = {}; int ret; /* Currently Alternate path messages are not supported for @@ -3292,7 +3296,25 @@ static int cm_lap_handler(struct cm_work *work) work->cm_event.private_data = IBA_GET_MEM_PTR(CM_LAP_PRIVATE_DATA, lap_msg); + ret = ib_init_ah_attr_from_wc(work->port->cm_dev->ib_device, + work->port->port_num, + work->mad_recv_wc->wc, + work->mad_recv_wc->recv_buf.grh, + &ah_attr); + if (ret) + goto deref; + + ret = cm_init_av_by_path(param->alternate_path, NULL, &alt_av); + if (ret) { + rdma_destroy_ah_attr(&ah_attr); + return -EINVAL; + } + spin_lock_irq(&cm_id_priv->lock); + cm_init_av_for_lap(work->port, work->mad_recv_wc->wc, + &ah_attr, &cm_id_priv->av); + cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av); + if (cm_id_priv->id.state != IB_CM_ESTABLISHED) goto unlock; @@ -3326,17 +3348,6 @@ static int cm_lap_handler(struct cm_work *work) goto unlock; } - ret = cm_init_av_for_lap(work->port, work->mad_recv_wc->wc, - work->mad_recv_wc->recv_buf.grh, - &cm_id_priv->av); - if (ret) - goto unlock; - - ret = cm_init_av_by_path(param->alternate_path, NULL, - &cm_id_priv->alt_av); - if (ret) - goto unlock; - cm_id_priv->id.lap_state = IB_CM_LAP_RCVD; cm_id_priv->tid = lap_msg->hdr.tid; cm_queue_work_unlock(cm_id_priv, work); @@ -3443,6 +3454,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, { struct cm_id_private *cm_id_priv; struct ib_mad_send_buf *msg; + struct cm_av av = {}; unsigned long flags; int ret; @@ -3451,17 +3463,16 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); - ret = cm_init_av_by_path(param->path, param->sgid_attr, - &cm_id_priv->av); + ret = cm_init_av_by_path(param->path, param->sgid_attr, &av); if (ret) return ret; + spin_lock_irqsave(&cm_id_priv->lock, flags); + cm_move_av_from_path(&cm_id_priv->av, &av); cm_id->service_id = param->service_id; cm_id->service_mask = ~cpu_to_be64(0); cm_id_priv->timeout_ms = param->timeout_ms; cm_id_priv->max_cm_retries = param->max_cm_retries; - - spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_IDLE) { ret = -EINVAL; goto out_unlock; From patchwork Tue May 11 08:22:12 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leon Romanovsky X-Patchwork-Id: 12250169 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-19.7 required=3.0 tests=BAYES_00,DKIMWL_WL_HIGH, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,URIBL_BLOCKED,USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id DC361C43600 for ; Tue, 11 May 2021 08:31:36 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id AF91361184 for ; Tue, 11 May 2021 08:31:36 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230511AbhEKIcj (ORCPT ); Tue, 11 May 2021 04:32:39 -0400 Received: from mail.kernel.org ([198.145.29.99]:42532 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230473AbhEKIci (ORCPT ); Tue, 11 May 2021 04:32:38 -0400 Received: by mail.kernel.org (Postfix) with ESMTPSA id 41C706193D; Tue, 11 May 2021 08:23:14 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1620721395; bh=KRSlpw7GAj/rToMbf95w4v5XrJqIGcof7/1rcJHrlf4=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=VuVwa5QjzwQnG6atHOCTyaqkjV50X8rEfq2lHabDiyanJS+MSSjDbYOpeqeD2ljW+ KgRTPo7xc0LrqTG6MpqU5pjFeGcQZZPxzI76ETa4ZyTezoa/7zzoCuOaZuFQUt07D3 Nl8qj/OhhpOgLMB/L9QsOVegvN/+wz4IS8cgp9zTP2OyoaoNZGqJGC505/SgklCfPX v4bdVhL5BTj8PWevAosqcOEbmq+MrZeufjd4Wv6JB5jOheCVil2cnRFAKZ67/SXHai rYwd/+HdJYjc19H43nOahuN6jSgUewXG/YzK/woQWviXXBaEWxV766FNM+s9TrYmFe VGUNnW9KDFjqQ== From: Leon Romanovsky To: Doug Ledford , Jason Gunthorpe Cc: Mark Zhang , linux-kernel@vger.kernel.org, linux-rdma@vger.kernel.org, Sean Hefty Subject: [PATCH rdma-next v3 8/8] IB/cm: Protect cm_dev, cm_ports and mad_agent with kref and lock Date: Tue, 11 May 2021 11:22:12 +0300 Message-Id: <7ca9e316890a3755abadefdd7fe3fc1dc4a1e79f.1620720467.git.leonro@nvidia.com> X-Mailer: git-send-email 2.31.1 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Mark Zhang During cm_dev deregistration in cm_remove_one(), the cm_device and cm_ports will be freed, after that they should not be accessed. The mad_agent needs to be protected as well. This patch adds a cm_device kref to protect cm_dev and cm_ports, and a mad_agent_lock spinlock to protect mad_agent. Signed-off-by: Jason Gunthorpe Signed-off-by: Mark Zhang Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cm.c | 112 +++++++++++++++++++++++++++++------ 1 file changed, 93 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 2e4658795e8e..292f1639e8b0 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -205,7 +205,9 @@ struct cm_port { }; struct cm_device { + struct kref kref; struct list_head list; + spinlock_t mad_agent_lock; struct ib_device *ib_device; u8 ack_delay; int going_down; @@ -287,6 +289,22 @@ struct cm_id_private { struct rdma_ucm_ece ece; }; +static void cm_dev_release(struct kref *kref) +{ + struct cm_device *cm_dev = container_of(kref, struct cm_device, kref); + u32 i; + + rdma_for_each_port(cm_dev->ib_device, i) + kfree(cm_dev->port[i - 1]); + + kfree(cm_dev); +} + +static void cm_device_put(struct cm_device *cm_dev) +{ + kref_put(&cm_dev->kref, cm_dev_release); +} + static void cm_work_handler(struct work_struct *work); static inline void cm_deref_id(struct cm_id_private *cm_id_priv) @@ -301,10 +319,23 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv) struct ib_mad_send_buf *m; struct ib_ah *ah; + lockdep_assert_held(&cm_id_priv->lock); + + if (!cm_id_priv->av.port) + return ERR_PTR(-EINVAL); + + spin_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); mad_agent = cm_id_priv->av.port->mad_agent; + if (!mad_agent) { + m = ERR_PTR(-EINVAL); + goto out; + } + ah = rdma_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr, 0); - if (IS_ERR(ah)) - return (void *)ah; + if (IS_ERR(ah)) { + m = ERR_CAST(ah); + goto out; + } m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, cm_id_priv->av.pkey_index, @@ -313,7 +344,7 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv) IB_MGMT_BASE_VERSION); if (IS_ERR(m)) { rdma_destroy_ah(ah, 0); - return m; + goto out; } /* Timeout set by caller if response is expected. */ @@ -322,6 +353,9 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv) refcount_inc(&cm_id_priv->refcount); m->context[0] = cm_id_priv; + +out: + spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); return m; } @@ -440,10 +474,24 @@ static void cm_set_private_data(struct cm_id_private *cm_id_priv, cm_id_priv->private_data_len = private_data_len; } +static void cm_set_av_port(struct cm_av *av, struct cm_port *port) +{ + struct cm_port *old_port = av->port; + + if (old_port == port) + return; + + av->port = port; + if (old_port) + cm_device_put(old_port->cm_dev); + if (port) + kref_get(&port->cm_dev->kref); +} + static void cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc, struct rdma_ah_attr *ah_attr, struct cm_av *av) { - av->port = port; + cm_set_av_port(av, port); av->pkey_index = wc->pkey_index; rdma_move_ah_attr(&av->ah_attr, ah_attr); } @@ -451,7 +499,7 @@ static void cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc, static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, struct ib_grh *grh, struct cm_av *av) { - av->port = port; + cm_set_av_port(av, port); av->pkey_index = wc->pkey_index; return ib_init_ah_attr_from_wc(port->cm_dev->ib_device, port->port_num, wc, @@ -518,7 +566,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, if (ret) return ret; - av->port = port; + cm_set_av_port(av, port); /* * av->ah_attr might be initialized based on wc or during @@ -542,7 +590,8 @@ static int cm_init_av_by_path(struct sa_path_rec *path, /* Move av created by cm_init_av_by_path(), so av.dgid is not moved */ static void cm_move_av_from_path(struct cm_av *dest, struct cm_av *src) { - dest->port = src->port; + cm_set_av_port(dest, src->port); + cm_set_av_port(src, NULL); dest->pkey_index = src->pkey_index; rdma_move_ah_attr(&dest->ah_attr, &src->ah_attr); dest->timeout = src->timeout; @@ -551,6 +600,7 @@ static void cm_move_av_from_path(struct cm_av *dest, struct cm_av *src) static void cm_destroy_av(struct cm_av *av) { rdma_destroy_ah_attr(&av->ah_attr); + cm_set_av_port(av, NULL); } static u32 cm_local_id(__be32 local_id) @@ -1275,10 +1325,18 @@ EXPORT_SYMBOL(ib_cm_insert_listen); static __be64 cm_form_tid(struct cm_id_private *cm_id_priv) { - u64 hi_tid, low_tid; + u64 hi_tid = 0, low_tid; + + lockdep_assert_held(&cm_id_priv->lock); + + low_tid = (u64)cm_id_priv->id.local_id; + if (!cm_id_priv->av.port) + return cpu_to_be64(low_tid); - hi_tid = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32; - low_tid = (u64)cm_id_priv->id.local_id; + spin_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); + if (cm_id_priv->av.port->mad_agent) + hi_tid = ((u64)cm_id_priv->av.port->mad_agent->hi_tid) << 32; + spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock); return cpu_to_be64(hi_tid | low_tid); } @@ -2139,6 +2197,8 @@ static int cm_req_handler(struct cm_work *work) sa_path_set_dmac(&work->path[0], cm_id_priv->av.ah_attr.roce.dmac); work->path[0].hop_limit = grh->hop_limit; + + cm_destroy_av(&cm_id_priv->av); ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av); if (ret) { int err; @@ -4090,7 +4150,8 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_ATOMIC; qp_attr->pkey_index = cm_id_priv->av.pkey_index; - qp_attr->port_num = cm_id_priv->av.port->port_num; + if (cm_id_priv->av.port) + qp_attr->port_num = cm_id_priv->av.port->port_num; ret = 0; break; default: @@ -4132,7 +4193,8 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, cm_id_priv->responder_resources; qp_attr->min_rnr_timer = 0; } - if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) { + if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr) && + cm_id_priv->alt_av.port) { *qp_attr_mask |= IB_QP_ALT_PATH; qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; @@ -4193,7 +4255,9 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, } } else { *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE; - qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num; + if (cm_id_priv->alt_av.port) + qp_attr->alt_port_num = + cm_id_priv->alt_av.port->port_num; qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index; qp_attr->alt_timeout = cm_id_priv->alt_av.timeout; qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr; @@ -4311,6 +4375,8 @@ static int cm_add_one(struct ib_device *ib_device) if (!cm_dev) return -ENOMEM; + kref_init(&cm_dev->kref); + spin_lock_init(&cm_dev->mad_agent_lock); cm_dev->ib_device = ib_device; cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay; cm_dev->going_down = 0; @@ -4373,7 +4439,6 @@ static int cm_add_one(struct ib_device *ib_device) error1: port_modify.set_port_cap_mask = 0; port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; - kfree(port); while (--i) { if (!rdma_cap_ib_cm(ib_device, i)) continue; @@ -4382,10 +4447,9 @@ static int cm_add_one(struct ib_device *ib_device) ib_modify_port(ib_device, port->port_num, 0, &port_modify); ib_unregister_mad_agent(port->mad_agent); cm_remove_port_fs(port); - kfree(port); } free: - kfree(cm_dev); + cm_device_put(cm_dev); return ret; } @@ -4408,10 +4472,13 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) spin_unlock_irq(&cm.lock); rdma_for_each_port (ib_device, i) { + struct ib_mad_agent *mad_agent; + if (!rdma_cap_ib_cm(ib_device, i)) continue; port = cm_dev->port[i-1]; + mad_agent = port->mad_agent; ib_modify_port(ib_device, port->port_num, 0, &port_modify); /* * We flush the queue here after the going_down set, this @@ -4419,12 +4486,19 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) * after that we can call the unregister_mad_agent */ flush_workqueue(cm.wq); - ib_unregister_mad_agent(port->mad_agent); + /* + * The above ensures no call paths from the work are running, + * the remaining paths all take the unregistration lock + */ + spin_lock(&cm_dev->mad_agent_lock); + port->mad_agent = NULL; + spin_unlock(&cm_dev->mad_agent_lock); + ib_unregister_mad_agent(mad_agent); cm_remove_port_fs(port); - kfree(port); } - kfree(cm_dev); + /* All touches can only be on call path from the work */ + cm_device_put(cm_dev); } static int __init ib_cm_init(void)