@@ -257,6 +257,9 @@ static int addr4_resolve(struct sockaddr_in *src_in,
goto put;
}
+ if (rt->rt_uses_gateway)
+ addr->network = RDMA_NETWORK_IPV4;
+
ret = dst_fetch_ha(&rt->dst, addr, &fl4.daddr);
put:
ip_rt_put(rt);
@@ -271,6 +274,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
{
struct flowi6 fl6;
struct dst_entry *dst;
+ struct rt6_info *rt;
int ret;
memset(&fl6, 0, sizeof fl6);
@@ -282,6 +286,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
if ((ret = dst->error))
goto put;
+ rt = (struct rt6_info *)dst;
if (ipv6_addr_any(&fl6.saddr)) {
ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
&fl6.daddr, 0, &fl6.saddr);
@@ -305,6 +310,9 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
goto put;
}
+ if (rt->rt6i_flags & RTF_GATEWAY)
+ addr->network = RDMA_NETWORK_IPV6;
+
ret = dst_fetch_ha(dst, addr, &fl6.daddr);
put:
dst_release(dst);
@@ -1952,6 +1952,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
{
struct rdma_route *route = &id_priv->id.route;
struct rdma_addr *addr = &route->addr;
+ enum ib_gid_type network_gid_type;
struct cma_work *work;
int ret;
struct net_device *ndev = NULL;
@@ -1990,7 +1991,14 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
&route->path_rec->dgid);
- route->path_rec->hop_limit = 1;
+ /* Use the hint from IP Stack to select GID Type */
+ network_gid_type = ib_network_to_gid_type(addr->dev_addr.network);
+ if (addr->dev_addr.network != RDMA_NETWORK_IB) {
+ route->path_rec->gid_type = network_gid_type;
+ route->path_rec->hop_limit = IPV6_DEFAULT_HOPLIMIT;
+ } else {
+ route->path_rec->hop_limit = 1;
+ }
route->path_rec->reversible = 1;
route->path_rec->pkey = cpu_to_be16(0xffff);
route->path_rec->mtu_selector = IB_SA_EQ;
@@ -195,11 +195,11 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
}
EXPORT_SYMBOL(ib_create_ah);
-static int ib_get_grh_header_version(const void *h)
+static int ib_get_grh_header_version(const union rdma_network_hdr *h)
{
- const struct iphdr *ip4h = (struct iphdr *)(h + 20);
+ const struct iphdr *ip4h = (struct iphdr *)&h->roce4grh;
struct iphdr ip4h_checked;
- const struct ipv6hdr *ip6h = (struct ipv6hdr *)h;
+ const struct ipv6hdr *ip6h = (struct ipv6hdr *)&h->ibgrh;
if (ip6h->version != 6)
return (ip4h->version == 4) ? 4 : 0;
@@ -219,37 +219,6 @@ static int ib_get_grh_header_version(const void *h)
return 6;
}
-static int ib_get_dgid_sgid_by_grh(const void *h,
- enum rdma_network_type net_type,
- union ib_gid *dgid, union ib_gid *sgid)
-{
- switch (net_type) {
- case RDMA_NETWORK_IPV4: {
- const struct iphdr *ip4h = (struct iphdr *)(h + 20);
-
- ipv6_addr_set_v4mapped(ip4h->daddr, (struct in6_addr *)dgid);
- ipv6_addr_set_v4mapped(ip4h->saddr, (struct in6_addr *)sgid);
- return 0;
- }
- case RDMA_NETWORK_IPV6: {
- struct ipv6hdr *ip6h = (struct ipv6hdr *)h;
-
- memcpy(dgid, &ip6h->daddr, sizeof(*dgid));
- memcpy(sgid, &ip6h->saddr, sizeof(*sgid));
- return 0;
- }
- case RDMA_NETWORK_IB: {
- struct ib_grh *grh = (struct ib_grh *)h;
-
- memcpy(dgid, &grh->dgid, sizeof(*dgid));
- memcpy(sgid, &grh->sgid, sizeof(*sgid));
- return 0;
- }
- }
-
- return -EINVAL;
-}
-
static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device,
u8 port_num,
const struct ib_grh *grh)
@@ -259,7 +228,7 @@ static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device,
if (rdma_port_get_link_layer(device, port_num) == IB_LINK_LAYER_INFINIBAND)
return RDMA_NETWORK_IB;
- grh_version = ib_get_grh_header_version(grh);
+ grh_version = ib_get_grh_header_version((union rdma_network_hdr *)grh);
if (grh_version == 4)
return RDMA_NETWORK_IPV4;
@@ -305,6 +274,38 @@ static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num,
&context, gid_index);
}
+static int get_gids_from_rdma_hdr(union rdma_network_hdr *hdr,
+ enum rdma_network_type net_type,
+ union ib_gid *sgid, union ib_gid *dgid)
+{
+ struct sockaddr_in src_in;
+ struct sockaddr_in dst_in;
+ __be32 src_saddr, dst_saddr;
+
+ if (!sgid || !dgid)
+ return -EINVAL;
+
+ if (net_type == RDMA_NETWORK_IPV4) {
+ memcpy(&src_in.sin_addr.s_addr,
+ &hdr->roce4grh.saddr, 4);
+ memcpy(&dst_in.sin_addr.s_addr,
+ &hdr->roce4grh.daddr, 4);
+ src_saddr = src_in.sin_addr.s_addr;
+ dst_saddr = dst_in.sin_addr.s_addr;
+ ipv6_addr_set_v4mapped(src_saddr,
+ (struct in6_addr *)sgid);
+ ipv6_addr_set_v4mapped(dst_saddr,
+ (struct in6_addr *)dgid);
+ return 0;
+ } else if (net_type == RDMA_NETWORK_IPV6 ||
+ net_type == RDMA_NETWORK_IB) {
+ *dgid = hdr->ibgrh.dgid;
+ *sgid = hdr->ibgrh.sgid;
+ return 0;
+ } else
+ return -EINVAL;
+}
+
int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
struct ib_grh *grh, struct ib_ah_attr *ah_attr)
{
@@ -326,7 +327,8 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
net_type = ib_get_net_type_by_grh(device, port_num, grh);
gid_type = ib_network_to_gid_type(net_type);
}
- ret = ib_get_dgid_sgid_by_grh(grh, net_type, &dgid, &sgid);
+ ret = get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type,
+ &sgid, &dgid);
if (ret)
return ret;
@@ -1007,6 +1009,9 @@ int ib_resolve_eth_dmac(struct ib_qp *qp,
rcu_read_unlock();
goto out;
}
+ if (sgid_attr.gid_type == IB_GID_TYPE_ROCE_V2)
+ qp_attr->ah_attr.grh.hop_limit =
+ IPV6_DEFAULT_HOPLIMIT;
dev_hold(sgid_attr.ndev);
ifindex = sgid_attr.ndev->ifindex;
@@ -71,6 +71,7 @@ struct rdma_dev_addr {
unsigned short dev_type;
int bound_dev_if;
enum rdma_transport_type transport;
+ enum rdma_network_type network;
};
/**
@@ -51,6 +51,7 @@
#include <net/net_namespace.h>
#include <uapi/linux/if_ether.h>
#include <net/ipv6.h>
+#include <net/ip.h>
#include <linux/atomic.h>
#include <linux/mmu_notifier.h>
@@ -517,6 +518,14 @@ struct ib_grh {
union ib_gid dgid;
};
+union rdma_network_hdr {
+ struct ib_grh ibgrh;
+ struct {
+ u8 reserved[20];
+ struct iphdr roce4grh;
+ };
+};
+
enum {
IB_MULTICAST_QPN = 0xffffff
};
1. Choose sgid_index and type from all the matching entries in RDMA-CM based on hint from the IP stack. 2. Set hop_limit for the IP Packet based on above hint from IP stack 3. Define a RDMA_NETWORK enum type. Signed-off-by: Somnath Kotur <somnath.kotur@emulex.com> --- drivers/infiniband/core/addr.c | 8 +++++ drivers/infiniband/core/cma.c | 10 +++++- drivers/infiniband/core/verbs.c | 77 ++++++++++++++++++++++------------------- include/rdma/ib_addr.h | 1 + include/rdma/ib_verbs.h | 9 +++++ 5 files changed, 68 insertions(+), 37 deletions(-)