From patchwork Tue May 19 18:12:06 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Wan, Kaike" X-Patchwork-Id: 6440241 Return-Path: X-Original-To: patchwork-linux-rdma@patchwork.kernel.org Delivered-To: patchwork-parsemail@patchwork1.web.kernel.org Received: from mail.kernel.org (mail.kernel.org [198.145.29.136]) by patchwork1.web.kernel.org (Postfix) with ESMTP id C3E999F1CC for ; Tue, 19 May 2015 18:12:52 +0000 (UTC) Received: from mail.kernel.org (localhost [127.0.0.1]) by mail.kernel.org (Postfix) with ESMTP id 72B84201F2 for ; Tue, 19 May 2015 18:12:51 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id BE7CC203AB for ; Tue, 19 May 2015 18:12:49 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751947AbbESSMm (ORCPT ); Tue, 19 May 2015 14:12:42 -0400 Received: from mga01.intel.com ([192.55.52.88]:27340 "EHLO mga01.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751807AbbESSMk (ORCPT ); Tue, 19 May 2015 14:12:40 -0400 Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by fmsmga101.fm.intel.com with ESMTP; 19 May 2015 11:12:37 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.13,459,1427785200"; d="scan'208";a="495652964" Received: from phlsvsds.ph.intel.com ([10.228.195.38]) by FMSMGA003.fm.intel.com with ESMTP; 19 May 2015 11:12:36 -0700 Received: from phlsvsds.ph.intel.com (localhost.localdomain [127.0.0.1]) by phlsvsds.ph.intel.com (8.13.8/8.13.8) with ESMTP id t4JICZeO005883; Tue, 19 May 2015 14:12:35 -0400 Received: (from kaikewan@localhost) by phlsvsds.ph.intel.com (8.13.8/8.13.8/Submit) id t4JICZMG005873; Tue, 19 May 2015 14:12:35 -0400 X-Authentication-Warning: phlsvsds.ph.intel.com: kaikewan set sender to kaike.wan@intel.com using -f From: kaike.wan@intel.com To: linux-rdma@vger.kernel.org Cc: Kaike Wan , John Fleck , Ira Weiny Subject: [PATCH v2 4/4] IB/sa: Route SA pathrecord query through netlink Date: Tue, 19 May 2015 14:12:06 -0400 Message-Id: <1432059126-4137-5-git-send-email-kaike.wan@intel.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1432059126-4137-1-git-send-email-kaike.wan@intel.com> References: <1432059126-4137-1-git-send-email-kaike.wan@intel.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Spam-Status: No, score=-6.9 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_HI, T_RP_MATCHES_RCVD, UNPARSEABLE_RELAY autolearn=unavailable version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Kaike Wan This patch routes a SA pathrecord query to netlink first and processes the response appropriately. If a failure is returned, the request will be sent through IB. The decision whether to route the request to netlink first is determined by the presence of a listener for the MAD netlink multicast group. If the user-space MAD netlink multicast group listener is not present, the request will be sent through IB, just like what is currently being done. Signed-off-by: Kaike Wan Signed-off-by: John Fleck Signed-off-by: Ira Weiny Reviewed-by: Sean Hefty --- drivers/infiniband/core/sa_query.c | 325 +++++++++++++++++++++++++++++++++++- 1 files changed, 324 insertions(+), 1 deletions(-) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 17e1cf7..0e362d0 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -45,12 +45,21 @@ #include #include #include +#include +#include #include "sa.h" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand subnet administration query support"); MODULE_LICENSE("Dual BSD/GPL"); +#define IB_SA_LOCAL_SVC_TIMEOUT_MIN 100 +#define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT 2000 +static int sa_local_svc_timeout_ms = IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT; + +module_param_named(local_svc_timeout_ms, sa_local_svc_timeout_ms, int, 0644); +MODULE_PARM_DESC(local_svc_timeout_ms, "Local service timeout in millisecond"); + struct ib_sa_sm_ah { struct ib_ah *ah; struct kref ref; @@ -80,8 +89,24 @@ struct ib_sa_query { struct ib_mad_send_buf *mad_buf; struct ib_sa_sm_ah *sm_ah; int id; + u32 flags; }; +#define IB_SA_ENABLE_LOCAL_SERVICE 0x00000001 +#define IB_SA_CANCEL 0x00000002 + +#define IB_SA_LOCAL_SVC_ENABLED(query) \ + ((query)->flags & IB_SA_ENABLE_LOCAL_SERVICE) +#define IB_SA_ENABLE_LOCAL_SVC(query) \ + ((query)->flags |= IB_SA_ENABLE_LOCAL_SERVICE) +#define IB_SA_DISABLE_LOCAL_SVC(query) \ + ((query)->flags &= ~IB_SA_ENABLE_LOCAL_SERVICE) + +#define IB_SA_QUERY_CANCELLED(query) \ + ((query)->flags & IB_SA_CANCEL) +#define IB_SA_CANCEL_QUERY(query) \ + ((query)->flags |= IB_SA_CANCEL) + struct ib_sa_service_query { void (*callback)(int, struct ib_sa_service_rec *, void *); void *context; @@ -106,6 +131,24 @@ struct ib_sa_mcmember_query { struct ib_sa_query sa_query; }; +struct ib_nl_request_info { + struct list_head list; + u32 seq; + unsigned long timeout; + struct ib_sa_query *query; +}; + +struct ib_nl_resp_msg { + struct nlmsghdr nl_hdr; + struct ib_sa_mad sa_mad; +}; + +static LIST_HEAD(ib_nl_request_list); +static DEFINE_SPINLOCK(ib_nl_request_lock); +static atomic_t ib_nl_sa_request_seq; +static struct workqueue_struct *ib_nl_wq; +static struct delayed_work ib_nl_timed_work; + static void ib_sa_add_one(struct ib_device *device); static void ib_sa_remove_one(struct ib_device *device); @@ -381,6 +424,244 @@ static const struct ib_field guidinfo_rec_table[] = { .size_bits = 512 }, }; +static int ib_nl_send_mad(void *mad, int len, u32 seq) +{ + struct sk_buff *skb = NULL; + struct nlmsghdr *nlh; + void *data; + int ret = 0; + + skb = nlmsg_new(len, GFP_KERNEL); + if (!skb) { + pr_err("alloc failed ret=%d\n", ret); + return -ENOMEM; + } + + data = ibnl_put_msg(skb, &nlh, seq, len, RDMA_NL_MAD, + RDMA_NL_MAD_REQUEST, GFP_KERNEL); + if (!data) { + kfree_skb(skb); + return -EMSGSIZE; + } + memcpy(data, mad, len); + + ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_MAD, GFP_KERNEL); + if (!ret) { + ret = len; + } else { + if (ret != -ESRCH) + pr_err("ibnl_multicast failed l=%d, r=%d\n", len, ret); + ret = 0; + } + return ret; +} + +static struct ib_nl_request_info * +ib_nl_alloc_request(struct ib_sa_query *query) +{ + struct ib_nl_request_info *rinfo; + + rinfo = kzalloc(sizeof(*rinfo), GFP_ATOMIC); + if (rinfo == NULL) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&rinfo->list); + rinfo->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq); + rinfo->query = query; + + return rinfo; +} + +static int ib_nl_send_request(struct ib_nl_request_info *rinfo) +{ + struct ib_mad_send_buf *send_buf; + unsigned long flags; + unsigned long delay; + int ret; + + send_buf = rinfo->query->mad_buf; + + delay = msecs_to_jiffies(sa_local_svc_timeout_ms); + spin_lock_irqsave(&ib_nl_request_lock, flags); + ret = ib_nl_send_mad(send_buf->mad, + (send_buf->data_len + send_buf->hdr_len), + rinfo->seq); + + if (ret != (send_buf->data_len + send_buf->hdr_len)) { + kfree(rinfo); + ret = -EIO; + goto request_out; + } else { + ret = 0; + } + + rinfo->timeout = delay + jiffies; + list_add_tail(&rinfo->list, &ib_nl_request_list); + /* Start the timeout if this is the only request */ + if (ib_nl_request_list.next == &rinfo->list) + queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay); + +request_out: + spin_unlock_irqrestore(&ib_nl_request_lock, flags); + + return ret; +} + +static int ib_nl_make_request(struct ib_sa_query *query) +{ + struct ib_nl_request_info *rinfo; + + rinfo = ib_nl_alloc_request(query); + if (IS_ERR(rinfo)) + return -ENOMEM; + + return ib_nl_send_request(rinfo); +} + +static int ib_nl_cancel_request(struct ib_sa_query *query) +{ + unsigned long flags; + struct ib_nl_request_info *rinfo; + int found = 0; + + spin_lock_irqsave(&ib_nl_request_lock, flags); + list_for_each_entry(rinfo, &ib_nl_request_list, list) { + /* Let the timeout to take care of the callback */ + if (query == rinfo->query) { + IB_SA_CANCEL_QUERY(query); + rinfo->timeout = jiffies; + list_move(&rinfo->list, &ib_nl_request_list); + found = 1; + mod_delayed_work(ib_nl_wq, &ib_nl_timed_work, 1); + break; + } + } + spin_unlock_irqrestore(&ib_nl_request_lock, flags); + + return found; +} + + +static int ib_nl_handle_mad_resp(struct sk_buff *skb, + struct netlink_callback *cb); +static struct ibnl_client_cbs ib_sa_cb_table[] = { + [RDMA_NL_MAD_REQUEST] = { + .dump = ib_nl_handle_mad_resp, + .module = THIS_MODULE }, +}; + +static void send_handler(struct ib_mad_agent *agent, + struct ib_mad_send_wc *mad_send_wc); + +static void ib_nl_process_good_rsp(struct ib_sa_query *query, + struct ib_sa_mad *rsp) +{ + struct ib_mad_send_wc mad_send_wc; + + if (query->callback) + query->callback(query, 0, rsp); + + mad_send_wc.send_buf = query->mad_buf; + mad_send_wc.status = IB_WC_SUCCESS; + send_handler(query->mad_buf->mad_agent, &mad_send_wc); +} + +static void ib_nl_request_timeout(struct work_struct *work) +{ + unsigned long flags; + struct ib_nl_request_info *rinfo; + struct ib_sa_query *query; + unsigned long delay; + struct ib_mad_send_wc mad_send_wc; + int ret; + + spin_lock_irqsave(&ib_nl_request_lock, flags); + while (!list_empty(&ib_nl_request_list)) { + rinfo = list_entry(ib_nl_request_list.next, + struct ib_nl_request_info, list); + + if (time_after(rinfo->timeout, jiffies)) { + delay = rinfo->timeout - jiffies; + if ((long)delay <= 0) + delay = 1; + queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay); + break; + } + + list_del(&rinfo->list); + query = rinfo->query; + IB_SA_DISABLE_LOCAL_SVC(query); + /* Hold the lock to protect against query cancellation */ + if (IB_SA_QUERY_CANCELLED(query)) + ret = -1; + else + ret = ib_post_send_mad(query->mad_buf, NULL); + if (ret) { + mad_send_wc.send_buf = query->mad_buf; + mad_send_wc.status = IB_WC_WR_FLUSH_ERR; + spin_unlock_irqrestore(&ib_nl_request_lock, flags); + send_handler(query->port->agent, &mad_send_wc); + spin_lock_irqsave(&ib_nl_request_lock, flags); + } + kfree(rinfo); + } + spin_unlock_irqrestore(&ib_nl_request_lock, flags); +} + +static int ib_nl_handle_mad_resp(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct ib_nl_resp_msg *nl_msg = (struct ib_nl_resp_msg *)cb->nlh; + unsigned long flags; + struct ib_nl_request_info *rinfo; + struct ib_sa_query *query; + struct ib_mad_send_buf *send_buf; + struct ib_mad_send_wc mad_send_wc; + int found = 0; + int ret; + + spin_lock_irqsave(&ib_nl_request_lock, flags); + list_for_each_entry(rinfo, &ib_nl_request_list, list) { + /* + * If the query is cancelled, let the timeout routine + * take care of it. + */ + if (nl_msg->nl_hdr.nlmsg_seq == rinfo->seq) { + found = !IB_SA_QUERY_CANCELLED(rinfo->query); + if (found) + list_del(&rinfo->list); + break; + } + } + + if (!found) { + spin_unlock_irqrestore(&ib_nl_request_lock, flags); + goto resp_out; + } + + query = rinfo->query; + send_buf = query->mad_buf; + + if (nl_msg->sa_mad.mad_hdr.status != 0) { + /* if the result is a failure, send out the packet via IB */ + IB_SA_DISABLE_LOCAL_SVC(query); + ret = ib_post_send_mad(query->mad_buf, NULL); + spin_unlock_irqrestore(&ib_nl_request_lock, flags); + if (ret) { + mad_send_wc.send_buf = send_buf; + mad_send_wc.status = IB_WC_GENERAL_ERR; + send_handler(query->port->agent, &mad_send_wc); + } + } else { + spin_unlock_irqrestore(&ib_nl_request_lock, flags); + ib_nl_process_good_rsp(query, &nl_msg->sa_mad); + } + + kfree(rinfo); +resp_out: + return skb->len; +} + static void free_sm_ah(struct kref *kref) { struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); @@ -502,7 +783,13 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query) mad_buf = query->mad_buf; spin_unlock_irqrestore(&idr_lock, flags); - ib_cancel_mad(agent, mad_buf); + /* + * If the query is still on the netlink request list, schedule + * it to be cancelled by the timeout routine. Otherwise, it has been + * sent to the MAD layer and has to be cancelled from there. + */ + if (!ib_nl_cancel_request(query)) + ib_cancel_mad(agent, mad_buf); } EXPORT_SYMBOL(ib_sa_cancel_query); @@ -638,6 +925,14 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask) query->mad_buf->context[0] = query; query->id = id; + if (IB_SA_LOCAL_SVC_ENABLED(query)) { + if (!ibnl_chk_listeners(RDMA_NL_GROUP_MAD)) { + if (!ib_nl_make_request(query)) + return id; + } + IB_SA_DISABLE_LOCAL_SVC(query); + } + ret = ib_post_send_mad(query->mad_buf, NULL); if (ret) { spin_lock_irqsave(&idr_lock, flags); @@ -766,6 +1061,8 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, *sa_query = &query->sa_query; + IB_SA_ENABLE_LOCAL_SVC(&query->sa_query); + ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); if (ret < 0) goto err2; @@ -1250,6 +1547,10 @@ static int __init ib_sa_init(void) get_random_bytes(&tid, sizeof tid); + atomic_set(&ib_nl_sa_request_seq, 0); + sa_local_svc_timeout_ms = max(sa_local_svc_timeout_ms, + IB_SA_LOCAL_SVC_TIMEOUT_MIN); + ret = ib_register_client(&sa_client); if (ret) { printk(KERN_ERR "Couldn't register ib_sa client\n"); @@ -1262,7 +1563,25 @@ static int __init ib_sa_init(void) goto err2; } + ib_nl_wq = create_singlethread_workqueue("ib_nl_sa_wq"); + if (!ib_nl_wq) { + ret = -ENOMEM; + goto err3; + } + + if (ibnl_add_client(RDMA_NL_MAD, RDMA_NL_MAD_NUM_OPS, + ib_sa_cb_table)) { + pr_err("Failed to add netlink callback\n"); + ret = -EINVAL; + goto err4; + } + INIT_DELAYED_WORK(&ib_nl_timed_work, ib_nl_request_timeout); + return 0; +err4: + destroy_workqueue(ib_nl_wq); +err3: + mcast_cleanup(); err2: ib_unregister_client(&sa_client); err1: @@ -1271,6 +1590,10 @@ err1: static void __exit ib_sa_cleanup(void) { + ibnl_remove_client(RDMA_NL_MAD); + cancel_delayed_work(&ib_nl_timed_work); + flush_workqueue(ib_nl_wq); + destroy_workqueue(ib_nl_wq); mcast_cleanup(); ib_unregister_client(&sa_client); idr_destroy(&query_idr);