From patchwork Wed Mar 11 04:56:06 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Somnath Kotur X-Patchwork-Id: 5977411 Return-Path: X-Original-To: patchwork-linux-rdma@patchwork.kernel.org Delivered-To: patchwork-parsemail@patchwork1.web.kernel.org Received: from mail.kernel.org (mail.kernel.org [198.145.29.136]) by patchwork1.web.kernel.org (Postfix) with ESMTP id 19CD69F318 for ; Tue, 10 Mar 2015 12:31:27 +0000 (UTC) Received: from mail.kernel.org (localhost [127.0.0.1]) by mail.kernel.org (Postfix) with ESMTP id 4FC6320218 for ; Tue, 10 Mar 2015 12:31:23 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 784E72024F for ; Tue, 10 Mar 2015 12:31:19 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752233AbbCJMaz (ORCPT ); Tue, 10 Mar 2015 08:30:55 -0400 Received: from cmexedge1.emulex.com ([138.239.224.99]:30552 "EHLO CMEXEDGE1.ext.emulex.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751955AbbCJMay (ORCPT ); Tue, 10 Mar 2015 08:30:54 -0400 Received: from CMEXHTCAS2.ad.emulex.com (138.239.115.218) by CMEXEDGE1.ext.emulex.com (138.239.224.99) with Microsoft SMTP Server (TLS) id 14.3.210.2; Tue, 10 Mar 2015 05:30:55 -0700 Received: from codebrowse.emulex.com (10.192.207.129) by smtp.emulex.com (138.239.115.208) with Microsoft SMTP Server id 14.3.210.2; Tue, 10 Mar 2015 05:30:50 -0700 From: Somnath Kotur To: CC: , Somnath Kotur , Matan Barak Subject: [PATCH v2 for-next 14/32] IB/Core: Changes to the IB Core infrastructure for RoCEv2 support Date: Wed, 11 Mar 2015 10:26:06 +0530 X-Mailer: git-send-email 1.7.9.5 In-Reply-To: <1426049785-30364-1-git-send-email-somnath.kotur@emulex.com> References: <1426049785-30364-1-git-send-email-somnath.kotur@emulex.com> MIME-Version: 1.0 Message-ID: <8042ab75-2828-4fee-974f-d192ba5279a3@CMEXHTCAS2.ad.emulex.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Spam-Status: No, score=-3.7 required=5.0 tests=BAYES_00, DATE_IN_FUTURE_12_24, RCVD_IN_DNSWL_HI,T_RP_MATCHES_RCVD,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP 1. Choose sgid_index and type from all the matching entries in RDMA-CM based on hint from the IP stack. 2. Set hop_limit for the IP Packet based on above hint from IP stack 3. Define a RDMA_NETWORK enum type. Signed-off-by: Somnath Kotur Signed-off-by: Matan Barak --- drivers/infiniband/core/addr.c | 8 +++++ drivers/infiniband/core/cma.c | 10 +++++- drivers/infiniband/core/verbs.c | 77 ++++++++++++++++++++++------------------- include/rdma/ib_addr.h | 1 + include/rdma/ib_verbs.h | 9 +++++ 5 files changed, 68 insertions(+), 37 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 43af7f5..da24c0e 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -257,6 +257,9 @@ static int addr4_resolve(struct sockaddr_in *src_in, goto put; } + if (rt->rt_uses_gateway) + addr->network = RDMA_NETWORK_IPV4; + ret = dst_fetch_ha(&rt->dst, addr, &fl4.daddr); put: ip_rt_put(rt); @@ -271,6 +274,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, { struct flowi6 fl6; struct dst_entry *dst; + struct rt6_info *rt; int ret; memset(&fl6, 0, sizeof fl6); @@ -282,6 +286,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, if ((ret = dst->error)) goto put; + rt = (struct rt6_info *)dst; if (ipv6_addr_any(&fl6.saddr)) { ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev, &fl6.daddr, 0, &fl6.saddr); @@ -305,6 +310,9 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, goto put; } + if (rt->rt6i_flags & RTF_GATEWAY) + addr->network = RDMA_NETWORK_IPV6; + ret = dst_fetch_ha(dst, addr, &fl6.daddr); put: dst_release(dst); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 1705280..2bfe798 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1952,6 +1952,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) { struct rdma_route *route = &id_priv->id.route; struct rdma_addr *addr = &route->addr; + enum ib_gid_type network_gid_type; struct cma_work *work; int ret; struct net_device *ndev = NULL; @@ -1990,7 +1991,14 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr, &route->path_rec->dgid); - route->path_rec->hop_limit = 1; + /* Use the hint from IP Stack to select GID Type */ + network_gid_type = ib_network_to_gid_type(addr->dev_addr.network); + if (addr->dev_addr.network != RDMA_NETWORK_IB) { + route->path_rec->gid_type = network_gid_type; + route->path_rec->hop_limit = IPV6_DEFAULT_HOPLIMIT; + } else { + route->path_rec->hop_limit = 1; + } route->path_rec->reversible = 1; route->path_rec->pkey = cpu_to_be16(0xffff); route->path_rec->mtu_selector = IB_SA_EQ; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 2e7ccad..3586996 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -195,11 +195,11 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) } EXPORT_SYMBOL(ib_create_ah); -static int ib_get_grh_header_version(const void *h) +static int ib_get_grh_header_version(const union rdma_network_hdr *h) { - const struct iphdr *ip4h = (struct iphdr *)(h + 20); + const struct iphdr *ip4h = (struct iphdr *)&h->roce4grh; struct iphdr ip4h_checked; - const struct ipv6hdr *ip6h = (struct ipv6hdr *)h; + const struct ipv6hdr *ip6h = (struct ipv6hdr *)&h->ibgrh; if (ip6h->version != 6) return (ip4h->version == 4) ? 4 : 0; @@ -219,37 +219,6 @@ static int ib_get_grh_header_version(const void *h) return 6; } -static int ib_get_dgid_sgid_by_grh(const void *h, - enum rdma_network_type net_type, - union ib_gid *dgid, union ib_gid *sgid) -{ - switch (net_type) { - case RDMA_NETWORK_IPV4: { - const struct iphdr *ip4h = (struct iphdr *)(h + 20); - - ipv6_addr_set_v4mapped(ip4h->daddr, (struct in6_addr *)dgid); - ipv6_addr_set_v4mapped(ip4h->saddr, (struct in6_addr *)sgid); - return 0; - } - case RDMA_NETWORK_IPV6: { - struct ipv6hdr *ip6h = (struct ipv6hdr *)h; - - memcpy(dgid, &ip6h->daddr, sizeof(*dgid)); - memcpy(sgid, &ip6h->saddr, sizeof(*sgid)); - return 0; - } - case RDMA_NETWORK_IB: { - struct ib_grh *grh = (struct ib_grh *)h; - - memcpy(dgid, &grh->dgid, sizeof(*dgid)); - memcpy(sgid, &grh->sgid, sizeof(*sgid)); - return 0; - } - } - - return -EINVAL; -} - static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device, u8 port_num, const struct ib_grh *grh) @@ -259,7 +228,7 @@ static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device, if (rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_INFINIBAND) return RDMA_NETWORK_IB; - grh_version = ib_get_grh_header_version(grh); + grh_version = ib_get_grh_header_version((union rdma_network_hdr *)grh); if (grh_version == 4) return RDMA_NETWORK_IPV4; @@ -305,6 +274,38 @@ static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num, &context, gid_index); } +static int get_gids_from_rdma_hdr(union rdma_network_hdr *hdr, + enum rdma_network_type net_type, + union ib_gid *sgid, union ib_gid *dgid) +{ + struct sockaddr_in src_in; + struct sockaddr_in dst_in; + __be32 src_saddr, dst_saddr; + + if (!sgid || !dgid) + return -EINVAL; + + if (net_type == RDMA_NETWORK_IPV4) { + memcpy(&src_in.sin_addr.s_addr, + &hdr->roce4grh.saddr, 4); + memcpy(&dst_in.sin_addr.s_addr, + &hdr->roce4grh.daddr, 4); + src_saddr = src_in.sin_addr.s_addr; + dst_saddr = dst_in.sin_addr.s_addr; + ipv6_addr_set_v4mapped(src_saddr, + (struct in6_addr *)sgid); + ipv6_addr_set_v4mapped(dst_saddr, + (struct in6_addr *)dgid); + return 0; + } else if (net_type == RDMA_NETWORK_IPV6 || + net_type == RDMA_NETWORK_IB) { + *dgid = hdr->ibgrh.dgid; + *sgid = hdr->ibgrh.sgid; + return 0; + } else + return -EINVAL; +} + int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc, struct ib_grh *grh, struct ib_ah_attr *ah_attr) { @@ -326,7 +327,8 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc, net_type = ib_get_net_type_by_grh(device, port_num, grh); gid_type = ib_network_to_gid_type(net_type); } - ret = ib_get_dgid_sgid_by_grh(grh, net_type, &dgid, &sgid); + ret = get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type, + &sgid, &dgid); if (ret) return ret; @@ -1007,6 +1009,9 @@ int ib_resolve_eth_dmac(struct ib_qp *qp, rcu_read_unlock(); goto out; } + if (sgid_attr.gid_type == IB_GID_TYPE_ROCE_V2) + qp_attr->ah_attr.grh.hop_limit = + IPV6_DEFAULT_HOPLIMIT; dev_hold(sgid_attr.ndev); ifindex = sgid_attr.ndev->ifindex; diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 0dfaaa7..80afbf7 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -71,6 +71,7 @@ struct rdma_dev_addr { unsigned short dev_type; int bound_dev_if; enum rdma_transport_type transport; + enum rdma_network_type network; }; /** diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c673d64..ae5d01c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -516,6 +517,14 @@ struct ib_grh { union ib_gid dgid; }; +union rdma_network_hdr { + struct ib_grh ibgrh; + struct { + u8 reserved[20]; + struct iphdr roce4grh; + }; +}; + enum { IB_MULTICAST_QPN = 0xffffff };