diff mbox

[v3,for-next,15/33] IB/Core: Changes to the IB Core infrastructure for RoCEv2 support

Message ID 1d900dec-a791-4383-8f00-3b9b146bed4b@CMEXHTCAS2.ad.emulex.com (mailing list archive)
State Rejected
Headers show

Commit Message

Somnath Kotur March 25, 2015, 9:20 p.m. UTC
1. Choose sgid_index and type from all the matching entries in RDMA-CM
   based on hint from the IP stack.
2. Set hop_limit for the IP Packet based on above hint from IP stack
3. Define a RDMA_NETWORK enum type.

Signed-off-by: Somnath Kotur <somnath.kotur@emulex.com>
---
 drivers/infiniband/core/addr.c  |  8 +++++
 drivers/infiniband/core/cma.c   | 10 +++++-
 drivers/infiniband/core/verbs.c | 77 ++++++++++++++++++++++-------------------
 include/rdma/ib_addr.h          |  1 +
 include/rdma/ib_verbs.h         |  9 +++++
 5 files changed, 68 insertions(+), 37 deletions(-)
diff mbox

Patch

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 43af7f5..da24c0e 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -257,6 +257,9 @@  static int addr4_resolve(struct sockaddr_in *src_in,
 		goto put;
 	}
 
+	if (rt->rt_uses_gateway)
+		addr->network = RDMA_NETWORK_IPV4;
+
 	ret = dst_fetch_ha(&rt->dst, addr, &fl4.daddr);
 put:
 	ip_rt_put(rt);
@@ -271,6 +274,7 @@  static int addr6_resolve(struct sockaddr_in6 *src_in,
 {
 	struct flowi6 fl6;
 	struct dst_entry *dst;
+	struct rt6_info *rt;
 	int ret;
 
 	memset(&fl6, 0, sizeof fl6);
@@ -282,6 +286,7 @@  static int addr6_resolve(struct sockaddr_in6 *src_in,
 	if ((ret = dst->error))
 		goto put;
 
+	rt = (struct rt6_info *)dst;
 	if (ipv6_addr_any(&fl6.saddr)) {
 		ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
 					 &fl6.daddr, 0, &fl6.saddr);
@@ -305,6 +310,9 @@  static int addr6_resolve(struct sockaddr_in6 *src_in,
 		goto put;
 	}
 
+	if (rt->rt6i_flags & RTF_GATEWAY)
+		addr->network = RDMA_NETWORK_IPV6;
+
 	ret = dst_fetch_ha(dst, addr, &fl6.daddr);
 put:
 	dst_release(dst);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 8dec040..6f345e2 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1952,6 +1952,7 @@  static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 {
 	struct rdma_route *route = &id_priv->id.route;
 	struct rdma_addr *addr = &route->addr;
+	enum ib_gid_type network_gid_type;
 	struct cma_work *work;
 	int ret;
 	struct net_device *ndev = NULL;
@@ -1990,7 +1991,14 @@  static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 	rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
 		    &route->path_rec->dgid);
 
-	route->path_rec->hop_limit = 1;
+	/* Use the hint from IP Stack to select GID Type */
+	network_gid_type = ib_network_to_gid_type(addr->dev_addr.network);
+	if (addr->dev_addr.network != RDMA_NETWORK_IB) {
+		route->path_rec->gid_type = network_gid_type;
+		route->path_rec->hop_limit = IPV6_DEFAULT_HOPLIMIT;
+	} else {
+		route->path_rec->hop_limit = 1;
+	}
 	route->path_rec->reversible = 1;
 	route->path_rec->pkey = cpu_to_be16(0xffff);
 	route->path_rec->mtu_selector = IB_SA_EQ;
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 2e7ccad..3586996 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -195,11 +195,11 @@  struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
 }
 EXPORT_SYMBOL(ib_create_ah);
 
-static int ib_get_grh_header_version(const void *h)
+static int ib_get_grh_header_version(const union rdma_network_hdr *h)
 {
-	const struct iphdr *ip4h = (struct iphdr *)(h + 20);
+	const struct iphdr *ip4h = (struct iphdr *)&h->roce4grh;
 	struct iphdr ip4h_checked;
-	const struct ipv6hdr *ip6h = (struct ipv6hdr *)h;
+	const struct ipv6hdr *ip6h = (struct ipv6hdr *)&h->ibgrh;
 
 	if (ip6h->version != 6)
 		return (ip4h->version == 4) ? 4 : 0;
@@ -219,37 +219,6 @@  static int ib_get_grh_header_version(const void *h)
 	return 6;
 }
 
-static int ib_get_dgid_sgid_by_grh(const void *h,
-				   enum rdma_network_type net_type,
-				   union ib_gid *dgid, union ib_gid *sgid)
-{
-	switch (net_type) {
-	case RDMA_NETWORK_IPV4: {
-		const struct iphdr *ip4h = (struct iphdr *)(h + 20);
-
-		ipv6_addr_set_v4mapped(ip4h->daddr, (struct in6_addr *)dgid);
-		ipv6_addr_set_v4mapped(ip4h->saddr, (struct in6_addr *)sgid);
-		return 0;
-	}
-	case RDMA_NETWORK_IPV6: {
-		struct ipv6hdr *ip6h = (struct ipv6hdr *)h;
-
-		memcpy(dgid, &ip6h->daddr, sizeof(*dgid));
-		memcpy(sgid, &ip6h->saddr, sizeof(*sgid));
-		return 0;
-	}
-	case RDMA_NETWORK_IB: {
-		struct ib_grh *grh = (struct ib_grh *)h;
-
-		memcpy(dgid, &grh->dgid, sizeof(*dgid));
-		memcpy(sgid, &grh->sgid, sizeof(*sgid));
-		return 0;
-	}
-	}
-
-	return -EINVAL;
-}
-
 static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device,
 						     u8 port_num,
 						     const struct ib_grh *grh)
@@ -259,7 +228,7 @@  static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device,
 	if (rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_INFINIBAND)
 		return RDMA_NETWORK_IB;
 
-	grh_version = ib_get_grh_header_version(grh);
+	grh_version = ib_get_grh_header_version((union rdma_network_hdr *)grh);
 
 	if (grh_version == 4)
 		return RDMA_NETWORK_IPV4;
@@ -305,6 +274,38 @@  static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num,
 				     &context, gid_index);
 }
 
+static int get_gids_from_rdma_hdr(union rdma_network_hdr *hdr,
+				  enum rdma_network_type net_type,
+				  union ib_gid *sgid, union ib_gid *dgid)
+{
+	struct sockaddr_in  src_in;
+	struct sockaddr_in  dst_in;
+	__be32 src_saddr, dst_saddr;
+
+	if (!sgid || !dgid)
+		return -EINVAL;
+
+	if (net_type == RDMA_NETWORK_IPV4) {
+		memcpy(&src_in.sin_addr.s_addr,
+		       &hdr->roce4grh.saddr, 4);
+		memcpy(&dst_in.sin_addr.s_addr,
+		       &hdr->roce4grh.daddr, 4);
+		src_saddr = src_in.sin_addr.s_addr;
+		dst_saddr = dst_in.sin_addr.s_addr;
+		ipv6_addr_set_v4mapped(src_saddr,
+				       (struct in6_addr *)sgid);
+		ipv6_addr_set_v4mapped(dst_saddr,
+				       (struct in6_addr *)dgid);
+		return 0;
+	} else if (net_type == RDMA_NETWORK_IPV6 ||
+		   net_type == RDMA_NETWORK_IB) {
+		*dgid = hdr->ibgrh.dgid;
+		*sgid = hdr->ibgrh.sgid;
+		return 0;
+	} else
+		return -EINVAL;
+}
+
 int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
 		       struct ib_grh *grh, struct ib_ah_attr *ah_attr)
 {
@@ -326,7 +327,8 @@  int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
 			net_type = ib_get_net_type_by_grh(device, port_num, grh);
 		gid_type = ib_network_to_gid_type(net_type);
 	}
-	ret = ib_get_dgid_sgid_by_grh(grh, net_type, &dgid, &sgid);
+	ret = get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type,
+				     &sgid, &dgid);
 	if (ret)
 		return ret;
 
@@ -1007,6 +1009,9 @@  int ib_resolve_eth_dmac(struct ib_qp *qp,
 				rcu_read_unlock();
 				goto out;
 			}
+			if (sgid_attr.gid_type == IB_GID_TYPE_ROCE_V2)
+				qp_attr->ah_attr.grh.hop_limit =
+							IPV6_DEFAULT_HOPLIMIT;
 
 			dev_hold(sgid_attr.ndev);
 			ifindex = sgid_attr.ndev->ifindex;
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 0dfaaa7..80afbf7 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -71,6 +71,7 @@  struct rdma_dev_addr {
 	unsigned short dev_type;
 	int bound_dev_if;
 	enum rdma_transport_type transport;
+	enum rdma_network_type network;
 };
 
 /**
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 9de9e62..846db44 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -51,6 +51,7 @@ 
 #include <net/net_namespace.h>
 #include <uapi/linux/if_ether.h>
 #include <net/ipv6.h>
+#include <net/ip.h>
 
 #include <linux/atomic.h>
 #include <linux/mmu_notifier.h>
@@ -517,6 +518,14 @@  struct ib_grh {
 	union ib_gid	dgid;
 };
 
+union rdma_network_hdr {
+	struct ib_grh ibgrh;
+	struct {
+		u8		reserved[20];
+		struct iphdr	roce4grh;
+	};
+};
+
 enum {
 	IB_MULTICAST_QPN = 0xffffff
 };