From patchwork Thu Dec 10 21:14:55 2009 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Eli Cohen X-Patchwork-Id: 66353 Received: from vger.kernel.org (vger.kernel.org [209.132.176.167]) by demeter.kernel.org (8.14.2/8.14.2) with ESMTP id nBALEVAC027104 for ; Thu, 10 Dec 2009 21:14:32 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1761698AbZLJVOH (ORCPT ); Thu, 10 Dec 2009 16:14:07 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1761704AbZLJVOH (ORCPT ); Thu, 10 Dec 2009 16:14:07 -0500 Received: from mail.mellanox.co.il ([194.90.237.43]:36672 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1761698AbZLJVOE (ORCPT ); Thu, 10 Dec 2009 16:14:04 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from eli@dev.mellanox.co.il) with SMTP; 10 Dec 2009 23:22:32 +0200 Received: from localhost ([10.4.1.30]) by mtlexch01.mtl.com with Microsoft SMTPSVC(6.0.3790.3959); Thu, 10 Dec 2009 23:13:24 +0200 Date: Thu, 10 Dec 2009 23:14:55 +0200 From: Eli Cohen To: Jason Gunthorpe Cc: Eli Cohen , Linux RDMA list , Roland Dreier , ewg Subject: Re: [ewg] Re: [PATCH] rdmaoe/libibverbs: handle binary compatibility Message-ID: <20091210211455.GA5166@mtls03> References: <20091210170536.GA3229@mtls03> <20091210173353.GW1966@obsidianresearch.com> MIME-Version: 1.0 Content-Disposition: inline In-Reply-To: <20091210173353.GW1966@obsidianresearch.com> User-Agent: Mutt/1.5.20 (2009-06-14) X-OriginalArrivalTime: 10 Dec 2009 21:13:24.0895 (UTC) FILETIME=[9C3542F0:01CA79DD] X-TM-AS-Product-Ver: SMEX-8.0.0.1181-6.000.1038-17062.002 X-TM-AS-Result: No--13.069300-8.000000-31 X-TM-AS-User-Approved-Sender: No X-TM-AS-User-Blocked-Sender: No Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org diff --git a/examples/devinfo.c b/examples/devinfo.c index 84f95c7..393ec04 100644 --- a/examples/devinfo.c +++ b/examples/devinfo.c @@ -184,6 +184,19 @@ static int print_all_port_gids(struct ibv_context *ctx, uint8_t port_num, int tb return rc; } +static const char *link_layer_str(uint8_t link_layer) +{ + switch (link_layer) { + case IBV_LINK_LAYER_UNSPECIFIED: + case IBV_LINK_LAYER_INFINIBAND: + return "IB"; + case IBV_LINK_LAYER_ETHERNET: + return "Ethernet"; + default: + return "Unknown"; + } +} + static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port) { struct ibv_context *ctx; @@ -284,6 +297,7 @@ static int print_hca_cap(struct ibv_device *ib_dev, uint8_t ib_port) printf("\t\t\tsm_lid:\t\t\t%d\n", port_attr.sm_lid); printf("\t\t\tport_lid:\t\t%d\n", port_attr.lid); printf("\t\t\tport_lmc:\t\t0x%02x\n", port_attr.lmc); + printf("\t\t\tlink_layer:\t\t%s\n", link_layer_str(port_attr.link_layer)); if (verbose) { printf("\t\t\tmax_msg_sz:\t\t0x%x\n", port_attr.max_msg_sz); diff --git a/examples/pingpong.c b/examples/pingpong.c index b916f59..d4a46e4 100644 --- a/examples/pingpong.c +++ b/examples/pingpong.c @@ -31,6 +31,8 @@ */ #include "pingpong.h" +#include +#include enum ibv_mtu pp_mtu_to_enum(int mtu) { @@ -53,3 +55,10 @@ uint16_t pp_get_local_lid(struct ibv_context *context, int port) return attr.lid; } + +int pp_get_port_info(struct ibv_context *context, int port, + struct ibv_port_attr *attr) +{ + return ibv_query_port(context, port, attr); +} + diff --git a/examples/pingpong.h b/examples/pingpong.h index 71d7c3f..16d3466 100644 --- a/examples/pingpong.h +++ b/examples/pingpong.h @@ -37,5 +37,7 @@ enum ibv_mtu pp_mtu_to_enum(int mtu); uint16_t pp_get_local_lid(struct ibv_context *context, int port); +int pp_get_port_info(struct ibv_context *context, int port, + struct ibv_port_attr *attr); #endif /* IBV_PINGPONG_H */ diff --git a/examples/rc_pingpong.c b/examples/rc_pingpong.c index fa969e0..4d0bd0d 100644 --- a/examples/rc_pingpong.c +++ b/examples/rc_pingpong.c @@ -67,6 +67,8 @@ struct pingpong_context { int size; int rx_depth; int pending; + struct ibv_port_attr portinfo; + union ibv_gid dgid; }; struct pingpong_dest { @@ -94,6 +96,12 @@ static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn, .port_num = port } }; + + if (ctx->dgid.global.interface_id) { + attr.ah_attr.is_global = 1; + attr.ah_attr.grh.hop_limit = 1; + attr.ah_attr.grh.dgid = ctx->dgid; + } if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE | IBV_QP_AV | @@ -289,11 +297,11 @@ out: static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, int rx_depth, int port, - int use_event) + int use_event, int is_server) { struct pingpong_context *ctx; - ctx = malloc(sizeof *ctx); + ctx = calloc(1, sizeof *ctx); if (!ctx) return NULL; @@ -306,7 +314,7 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, return NULL; } - memset(ctx->buf, 0, size); + memset(ctx->buf, 0x7b + is_server, size); ctx->context = ibv_open_device(ib_dev); if (!ctx->context) { @@ -481,6 +489,7 @@ static void usage(const char *argv0) printf(" -n, --iters= number of exchanges (default 1000)\n"); printf(" -l, --sl= service level value\n"); printf(" -e, --events sleep on CQ events (default poll)\n"); + printf(" -g, --gid= gid of the other port\n"); } int main(int argc, char *argv[]) @@ -504,6 +513,7 @@ int main(int argc, char *argv[]) int rcnt, scnt; int num_cq_events = 0; int sl = 0; + char *grh = NULL; srand48(getpid() * time(NULL)); @@ -520,10 +530,11 @@ int main(int argc, char *argv[]) { .name = "iters", .has_arg = 1, .val = 'n' }, { .name = "sl", .has_arg = 1, .val = 'l' }, { .name = "events", .has_arg = 0, .val = 'e' }, + { .name = "gid", .has_arg = 1, .val = 'g' }, { 0 } }; - c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:e", long_options, NULL); + c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:eg:", long_options, NULL); if (c == -1) break; @@ -576,6 +587,10 @@ int main(int argc, char *argv[]) ++use_event; break; + case 'g': + grh = strdupa(optarg); + break; + default: usage(argv[0]); return 1; @@ -615,7 +630,7 @@ int main(int argc, char *argv[]) } } - ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event); + ctx = pp_init_ctx(ib_dev, size, rx_depth, ib_port, use_event, !servername); if (!ctx) return 1; @@ -631,17 +646,31 @@ int main(int argc, char *argv[]) return 1; } - my_dest.lid = pp_get_local_lid(ctx->context, ib_port); - my_dest.qpn = ctx->qp->qp_num; - my_dest.psn = lrand48() & 0xffffff; - if (!my_dest.lid) { - fprintf(stderr, "Couldn't get local LID\n"); + + if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) { + fprintf(stderr, "Couldn't get port info\n"); return 1; } + my_dest.lid = ctx->portinfo.lid; + if (ctx->portinfo.link_layer == IBV_LINK_LAYER_ETHERNET) { + if (!grh) { + fprintf(stderr, "Must supply remote gid\n"); + return 1; + } + inet_pton(AF_INET6, grh, &ctx->dgid); + } else { + if (!my_dest.lid) { + fprintf(stderr, "Couldn't get local LID\n"); + return 1; + } + } + my_dest.qpn = ctx->qp->qp_num; + my_dest.psn = lrand48() & 0xffffff; printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n", my_dest.lid, my_dest.qpn, my_dest.psn); + if (servername) rem_dest = pp_client_exch_dest(servername, port, &my_dest); else @@ -706,6 +735,7 @@ int main(int argc, char *argv[]) fprintf(stderr, "poll CQ failed %d\n", ne); return 1; } + } while (!use_event && ne < 1); for (i = 0; i < ne; ++i) { diff --git a/examples/srq_pingpong.c b/examples/srq_pingpong.c index 1e36c57..eda9013 100644 --- a/examples/srq_pingpong.c +++ b/examples/srq_pingpong.c @@ -71,6 +71,8 @@ struct pingpong_context { int num_qp; int rx_depth; int pending[MAX_QP]; + struct ibv_port_attr portinfo; + union ibv_gid dgid; }; struct pingpong_dest { @@ -101,6 +103,12 @@ static int pp_connect_ctx(struct pingpong_context *ctx, int port, enum ibv_mtu m .port_num = port } }; + + if (ctx->dgid.global.interface_id) { + attr.ah_attr.is_global = 1; + attr.ah_attr.grh.hop_limit = 1; + attr.ah_attr.grh.dgid = ctx->dgid; + } if (ibv_modify_qp(ctx->qp[i], &attr, IBV_QP_STATE | IBV_QP_AV | @@ -327,7 +335,7 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, struct pingpong_context *ctx; int i; - ctx = malloc(sizeof *ctx); + ctx = calloc(1, sizeof *ctx); if (!ctx) return NULL; @@ -551,6 +559,7 @@ static void usage(const char *argv0) printf(" -n, --iters= number of exchanges per QP(default 1000)\n"); printf(" -l, --sl= service level value\n"); printf(" -e, --events sleep on CQ events (default poll)\n"); + printf(" -g, --gid= gid of the other port\n"); } int main(int argc, char *argv[]) @@ -578,6 +587,7 @@ int main(int argc, char *argv[]) int i; int num_cq_events = 0; int sl = 0; + char *grh = NULL; srand48(getpid() * time(NULL)); @@ -595,10 +605,11 @@ int main(int argc, char *argv[]) { .name = "iters", .has_arg = 1, .val = 'n' }, { .name = "sl", .has_arg = 1, .val = 'l' }, { .name = "events", .has_arg = 0, .val = 'e' }, + { .name = "gid", .has_arg = 1, .val = 'g' }, { 0 } }; - c = getopt_long(argc, argv, "p:d:i:s:m:q:r:n:l:e", long_options, NULL); + c = getopt_long(argc, argv, "p:d:i:s:m:q:r:n:l:eg:", long_options, NULL); if (c == -1) break; @@ -655,6 +666,10 @@ int main(int argc, char *argv[]) ++use_event; break; + case 'g': + grh = strdupa(optarg); + break; + default: usage(argv[0]); return 1; @@ -722,13 +737,25 @@ int main(int argc, char *argv[]) memset(my_dest, 0, sizeof my_dest); + if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) { + fprintf(stderr, "Couldn't get port info\n"); + return 1; + } for (i = 0; i < num_qp; ++i) { my_dest[i].qpn = ctx->qp[i]->qp_num; my_dest[i].psn = lrand48() & 0xffffff; - my_dest[i].lid = pp_get_local_lid(ctx->context, ib_port); - if (!my_dest[i].lid) { - fprintf(stderr, "Couldn't get local LID\n"); - return 1; + if (ctx->portinfo.link_layer == IBV_LINK_LAYER_ETHERNET) { + if (!grh) { + fprintf(stderr, "Must supply remote gid\n"); + return 1; + } + inet_pton(AF_INET6, grh, &ctx->dgid); + } else { + my_dest[i].lid = ctx->portinfo.lid; + if (!my_dest[i].lid) { + fprintf(stderr, "Couldn't get local LID\n"); + return 1; + } } printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n", diff --git a/examples/uc_pingpong.c b/examples/uc_pingpong.c index 6f31247..2bc7da5 100644 --- a/examples/uc_pingpong.c +++ b/examples/uc_pingpong.c @@ -67,6 +67,8 @@ struct pingpong_context { int size; int rx_depth; int pending; + struct ibv_port_attr portinfo; + union ibv_gid dgid; }; struct pingpong_dest { @@ -92,6 +94,13 @@ static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn, .port_num = port } }; + + if (ctx->dgid.global.interface_id) { + attr.ah_attr.is_global = 1; + attr.ah_attr.grh.hop_limit = 1; + attr.ah_attr.grh.dgid = ctx->dgid; + } + if (ibv_modify_qp(ctx->qp, &attr, IBV_QP_STATE | IBV_QP_AV | @@ -281,7 +290,7 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, { struct pingpong_context *ctx; - ctx = malloc(sizeof *ctx); + ctx = calloc(1, sizeof *ctx); if (!ctx) return NULL; @@ -469,6 +478,7 @@ static void usage(const char *argv0) printf(" -n, --iters= number of exchanges (default 1000)\n"); printf(" -l, --sl= service level value\n"); printf(" -e, --events sleep on CQ events (default poll)\n"); + printf(" -g, --gid= gid of the other port\n"); } int main(int argc, char *argv[]) @@ -492,6 +502,7 @@ int main(int argc, char *argv[]) int rcnt, scnt; int num_cq_events = 0; int sl = 0; + char *grh = NULL; srand48(getpid() * time(NULL)); @@ -508,10 +519,11 @@ int main(int argc, char *argv[]) { .name = "iters", .has_arg = 1, .val = 'n' }, { .name = "sl", .has_arg = 1, .val = 'l' }, { .name = "events", .has_arg = 0, .val = 'e' }, + { .name = "gid", .has_arg = 1, .val = 'g' }, { 0 } }; - c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:e", long_options, NULL); + c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:eg:", long_options, NULL); if (c == -1) break; @@ -564,6 +576,10 @@ int main(int argc, char *argv[]) ++use_event; break; + case 'g': + grh = strdupa(optarg); + break; + default: usage(argv[0]); return 1; @@ -619,14 +635,27 @@ int main(int argc, char *argv[]) return 1; } - my_dest.lid = pp_get_local_lid(ctx->context, ib_port); - my_dest.qpn = ctx->qp->qp_num; - my_dest.psn = lrand48() & 0xffffff; - if (!my_dest.lid) { - fprintf(stderr, "Couldn't get local LID\n"); + if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) { + fprintf(stderr, "Couldn't get port info\n"); return 1; } + my_dest.lid = ctx->portinfo.lid; + if (ctx->portinfo.link_layer == IBV_LINK_LAYER_ETHERNET) { + if (!grh) { + fprintf(stderr, "Must supply remote gid\n"); + return 1; + } + inet_pton(AF_INET6, grh, &ctx->dgid); + } else { + if (!my_dest.lid) { + fprintf(stderr, "Couldn't get local LID\n"); + return 1; + } + } + my_dest.qpn = ctx->qp->qp_num; + my_dest.psn = lrand48() & 0xffffff; + printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n", my_dest.lid, my_dest.qpn, my_dest.psn); diff --git a/examples/ud_pingpong.c b/examples/ud_pingpong.c index 6f10212..e30d6d6 100644 --- a/examples/ud_pingpong.c +++ b/examples/ud_pingpong.c @@ -68,6 +68,8 @@ struct pingpong_context { int size; int rx_depth; int pending; + struct ibv_port_attr portinfo; + union ibv_gid dgid; }; struct pingpong_dest { @@ -105,6 +107,12 @@ static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn, return 1; } + if (ctx->dgid.global.interface_id) { + ah_attr.is_global = 1; + ah_attr.grh.hop_limit = 1; + ah_attr.grh.dgid = ctx->dgid; + } + ctx->ah = ibv_create_ah(ctx->pd, &ah_attr); if (!ctx->ah) { fprintf(stderr, "Failed to create AH\n"); @@ -478,6 +486,7 @@ static void usage(const char *argv0) printf(" -r, --rx-depth= number of receives to post at a time (default 500)\n"); printf(" -n, --iters= number of exchanges (default 1000)\n"); printf(" -e, --events sleep on CQ events (default poll)\n"); + printf(" -g, --gid specify remote gid\n"); } int main(int argc, char *argv[]) @@ -500,6 +509,7 @@ int main(int argc, char *argv[]) int rcnt, scnt; int num_cq_events = 0; int sl = 0; + char *gid = NULL; srand48(getpid() * time(NULL)); @@ -515,10 +525,11 @@ int main(int argc, char *argv[]) { .name = "iters", .has_arg = 1, .val = 'n' }, { .name = "sl", .has_arg = 1, .val = 'l' }, { .name = "events", .has_arg = 0, .val = 'e' }, + { .name = "gid", .has_arg = 1, .val = 'g' }, { 0 } }; - c = getopt_long(argc, argv, "p:d:i:s:r:n:l:e", long_options, NULL); + c = getopt_long(argc, argv, "p:d:i:s:r:n:l:eg:", long_options, NULL); if (c == -1) break; @@ -563,6 +574,10 @@ int main(int argc, char *argv[]) ++use_event; break; + case 'g': + gid = strdupa(optarg); + break; + default: usage(argv[0]); return 1; @@ -618,12 +633,25 @@ int main(int argc, char *argv[]) return 1; } - my_dest.lid = pp_get_local_lid(ctx->context, ib_port); + if (pp_get_port_info(ctx->context, ib_port, &ctx->portinfo)) { + fprintf(stderr, "Couldn't get port info\n"); + return 1; + } + my_dest.lid = ctx->portinfo.lid; + my_dest.qpn = ctx->qp->qp_num; my_dest.psn = lrand48() & 0xffffff; - if (!my_dest.lid) { - fprintf(stderr, "Couldn't get local LID\n"); - return 1; + if (ctx->portinfo.link_layer == IBV_LINK_LAYER_ETHERNET) { + if (!my_dest.lid) { + fprintf(stderr, "Couldn't get local LID\n"); + return 1; + } + } else { + if (!gid) { + fprintf(stderr, "must specify remote GID\n"); + return 1; + } + inet_pton(AF_INET6, gid, &ctx->dgid); } printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n", diff --git a/include/infiniband/driver.h b/include/infiniband/driver.h index 9a81416..8d7c2c6 100644 --- a/include/infiniband/driver.h +++ b/include/infiniband/driver.h @@ -131,6 +131,7 @@ int ibv_cmd_create_ah(struct ibv_pd *pd, struct ibv_ah *ah, int ibv_cmd_destroy_ah(struct ibv_ah *ah); int ibv_cmd_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid); int ibv_cmd_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid); +int ibv_cmd_get_mac(struct ibv_pd *pd, uint8_t port, uint8_t *gid, uint8_t *mac); int ibv_dontfork_range(void *base, size_t size); int ibv_dofork_range(void *base, size_t size); diff --git a/include/infiniband/kern-abi.h b/include/infiniband/kern-abi.h index 0db083a..8ef8844 100644 --- a/include/infiniband/kern-abi.h +++ b/include/infiniband/kern-abi.h @@ -46,7 +46,7 @@ * The minimum and maximum kernel ABI that we can handle. */ #define IB_USER_VERBS_MIN_ABI_VERSION 1 -#define IB_USER_VERBS_MAX_ABI_VERSION 6 +#define IB_USER_VERBS_MAX_ABI_VERSION 7 enum { IB_USER_VERBS_CMD_GET_CONTEXT, @@ -85,7 +85,8 @@ enum { IB_USER_VERBS_CMD_MODIFY_SRQ, IB_USER_VERBS_CMD_QUERY_SRQ, IB_USER_VERBS_CMD_DESTROY_SRQ, - IB_USER_VERBS_CMD_POST_SRQ_RECV + IB_USER_VERBS_CMD_POST_SRQ_RECV, + IB_USER_VERBS_CMD_GET_MAC }; /* @@ -223,7 +224,8 @@ struct ibv_query_port_resp { __u8 active_width; __u8 active_speed; __u8 phys_state; - __u8 reserved[3]; + __u8 link_layer; + __u8 reserved[2]; }; struct ibv_alloc_pd { @@ -798,6 +800,7 @@ enum { IB_USER_VERBS_CMD_QUERY_SRQ_V2, IB_USER_VERBS_CMD_DESTROY_SRQ_V2, IB_USER_VERBS_CMD_POST_SRQ_RECV_V2, + IB_USER_VERBS_CMD_GET_MAC_V2 = -1, /* * Set commands that didn't exist to -1 so our compile-time * trick opcodes in IBV_INIT_CMD() doesn't break. @@ -878,4 +881,20 @@ struct ibv_create_srq_resp_v5 { __u32 srq_handle; }; +struct ibv_get_mac { + __u32 command; + __u16 in_words; + __u16 out_words; + __u64 response; + __u32 pd_handle; + __u8 port; + __u8 reserved[3]; + __u8 dgid[16]; +}; + +struct ibv_get_mac_resp { + __u8 mac[6]; + __u16 reserved; +}; + #endif /* KERN_ABI_H */ diff --git a/include/infiniband/verbs.h b/include/infiniband/verbs.h index 0f1cb2e..ecdbc6f 100644 --- a/include/infiniband/verbs.h +++ b/include/infiniband/verbs.h @@ -38,6 +38,7 @@ #include #include +#include #ifdef __cplusplus # define BEGIN_C_DECLS extern "C" { @@ -61,6 +62,7 @@ union ibv_gid { uint64_t subnet_prefix; uint64_t interface_id; } global; + uint32_t dwords[4]; }; enum ibv_node_type { @@ -161,6 +163,12 @@ enum ibv_port_state { IBV_PORT_ACTIVE_DEFER = 5 }; +enum { + IBV_LINK_LAYER_UNSPECIFIED, + IBV_LINK_LAYER_INFINIBAND, + IBV_LINK_LAYER_ETHERNET, +}; + struct ibv_port_attr { enum ibv_port_state state; enum ibv_mtu max_mtu; @@ -181,6 +189,8 @@ struct ibv_port_attr { uint8_t active_width; uint8_t active_speed; uint8_t phys_state; + uint8_t link_layer; + uint8_t pad; }; enum ibv_event_type { @@ -693,6 +703,16 @@ struct ibv_context { void *abi_compat; }; +static inline int ___ibv_query_port(struct ibv_context *context, + uint8_t port_num, + struct ibv_port_attr *port_attr) +{ + port_attr->link_layer = IBV_LINK_LAYER_UNSPECIFIED; + port_attr->pad = 0; + + return context->ops.query_port(context, port_num, port_attr); +} + /** * ibv_get_device_list - Get list of IB devices currently available * @num_devices: optional. if non-NULL, set to the number of devices @@ -1097,4 +1117,7 @@ END_C_DECLS # undef __attribute_const +#define ibv_query_port(context, port_num, port_attr) \ + ___ibv_query_port(context, port_num, port_attr) + #endif /* INFINIBAND_VERBS_H */ diff --git a/src/cmd.c b/src/cmd.c index cbd5288..5183d59 100644 --- a/src/cmd.c +++ b/src/cmd.c @@ -162,6 +162,7 @@ int ibv_cmd_query_device(struct ibv_context *context, return 0; } +#include int ibv_cmd_query_port(struct ibv_context *context, uint8_t port_num, struct ibv_port_attr *port_attr, struct ibv_query_port *cmd, size_t cmd_size) @@ -196,6 +197,7 @@ int ibv_cmd_query_port(struct ibv_context *context, uint8_t port_num, port_attr->active_width = resp.active_width; port_attr->active_speed = resp.active_speed; port_attr->phys_state = resp.phys_state; + port_attr->link_layer = resp.link_layer; return 0; } @@ -1122,3 +1124,22 @@ int ibv_cmd_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t l return 0; } + +int ibv_cmd_get_mac(struct ibv_pd *pd, uint8_t port, uint8_t *gid, uint8_t *mac) +{ + struct ibv_get_mac cmd; + struct ibv_get_mac_resp resp; + + IBV_INIT_CMD_RESP(&cmd, sizeof cmd, GET_MAC, &resp, sizeof resp); + memcpy(cmd.dgid, gid, sizeof cmd.dgid); + cmd.pd_handle = pd->handle; + cmd.port = port; + + if (write(pd->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd) + return errno; + + memcpy(mac, resp.mac, 6); + + return 0; +} + diff --git a/src/libibverbs.map b/src/libibverbs.map index 1827da0..1688e73 100644 --- a/src/libibverbs.map +++ b/src/libibverbs.map @@ -64,6 +64,7 @@ IBVERBS_1.0 { ibv_cmd_destroy_ah; ibv_cmd_attach_mcast; ibv_cmd_detach_mcast; + ibv_cmd_get_mac; ibv_copy_qp_attr_from_kern; ibv_copy_path_rec_from_kern; ibv_copy_path_rec_to_kern; diff --git a/src/verbs.c b/src/verbs.c index ba3c0a4..2b175b6 100644 --- a/src/verbs.c +++ b/src/verbs.c @@ -86,6 +86,7 @@ default_symver(__ibv_query_device, ibv_query_device); int __ibv_query_port(struct ibv_context *context, uint8_t port_num, struct ibv_port_attr *port_attr) { + port_attr->link_layer = IBV_LINK_LAYER_UNSPECIFIED; return context->ops.query_port(context, port_num, port_attr); } default_symver(__ibv_query_port, ibv_query_port); libmlx4: diff --git a/src/mlx4.h b/src/mlx4.h index 4445998..661255b 100644 --- a/src/mlx4.h +++ b/src/mlx4.h @@ -236,11 +236,14 @@ struct mlx4_av { uint8_t hop_limit; uint32_t sl_tclass_flowlabel; uint8_t dgid[16]; + uint8_t mac[8]; }; struct mlx4_ah { struct ibv_ah ibv_ah; struct mlx4_av av; + uint16_t vlan; + uint8_t mac[6]; }; static inline unsigned long align(unsigned long val, unsigned long align) diff --git a/src/qp.c b/src/qp.c index d194ae3..cd8fab0 100644 --- a/src/qp.c +++ b/src/qp.c @@ -143,6 +143,8 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); dseg->dqpn = htonl(wr->wr.ud.remote_qpn); dseg->qkey = htonl(wr->wr.ud.remote_qkey); + dseg->vlan = htons(to_mah(wr->wr.ud.ah)->vlan); + memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->mac, 6); } static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ibv_sge *sg) diff --git a/src/verbs.c b/src/verbs.c index 1ac1362..667ef68 100644 --- a/src/verbs.c +++ b/src/verbs.c @@ -614,9 +614,21 @@ int mlx4_destroy_qp(struct ibv_qp *ibqp) return 0; } +static int mcast_mac(uint8_t *mac) +{ + int i; + uint8_t val = 0xff; + + for (i = 0; i < 6; ++i) + val &= mac[i]; + + return val == 0xff; +} + struct ibv_ah *mlx4_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) { struct mlx4_ah *ah; + struct ibv_port_attr port_attr; ah = malloc(sizeof *ah); if (!ah) @@ -642,7 +654,24 @@ struct ibv_ah *mlx4_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) memcpy(ah->av.dgid, attr->grh.dgid.raw, 16); } + if (ibv_query_port(pd->context, attr->port_num, &port_attr)) + goto err; + + if (port_attr.link_layer == IBV_LINK_LAYER_ETHERNET) { + if (ibv_cmd_get_mac(pd, attr->port_num, ah->av.dgid, ah->mac)) + goto err; + + ah->vlan = 0; + if (mcast_mac(ah->mac)) + ah->av.dlid = htons(0xc000); + + } + + return &ah->ibv_ah; +err: + free(ah); + return NULL; } int mlx4_destroy_ah(struct ibv_ah *ah) diff --git a/src/wqe.h b/src/wqe.h index 6f7f309..bbd22ba 100644 --- a/src/wqe.h +++ b/src/wqe.h @@ -78,7 +78,8 @@ struct mlx4_wqe_datagram_seg { uint32_t av[8]; uint32_t dqpn; uint32_t qkey; - uint32_t reserved[2]; + uint16_t vlan; + uint8_t mac[6]; }; struct mlx4_wqe_data_seg { kernel driver: diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 012aadf..d592bd2 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -452,7 +452,8 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, resp.active_width = attr.active_width; resp.active_speed = attr.active_speed; resp.phys_state = attr.phys_state; - resp.transport = attr.transport; + resp.transport = attr.transport == RDMA_TRANSPORT_RDMAOE ? + IB_LINK_LAYER_ETHERNET : IB_LINK_LAYER_INFINIBAND; if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index bf6e860..57653b7 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -73,6 +73,12 @@ enum rdma_transport_type { RDMA_TRANSPORT_RDMAOE }; +enum { + IV_LINK_LAYER_UNSPECIFIED, + IB_LINK_LAYER_INFINIBAND, + IB_LINK_LAYER_ETHERNET, +}; + enum ib_device_cap_flags { IB_DEVICE_RESIZE_MAX_WR = 1, IB_DEVICE_BAD_PKEY_CNTR = (1<<1),