From patchwork Thu Jul 11 15:04:23 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yishai Hadas X-Patchwork-Id: 2826507 Return-Path: X-Original-To: patchwork-linux-rdma@patchwork.kernel.org Delivered-To: patchwork-parsemail@patchwork1.web.kernel.org Received: from mail.kernel.org (mail.kernel.org [198.145.19.201]) by patchwork1.web.kernel.org (Postfix) with ESMTP id 7BFDD9F7D6 for ; Thu, 11 Jul 2013 16:05:02 +0000 (UTC) Received: from mail.kernel.org (localhost [127.0.0.1]) by mail.kernel.org (Postfix) with ESMTP id 007F220286 for ; Thu, 11 Jul 2013 16:05:00 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id AF2C82028A for ; Thu, 11 Jul 2013 16:04:57 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S932434Ab3GKQEw (ORCPT ); Thu, 11 Jul 2013 12:04:52 -0400 Received: from mailp.voltaire.com ([193.47.165.129]:54812 "EHLO mellanox.co.il" rhost-flags-OK-FAIL-OK-FAIL) by vger.kernel.org with ESMTP id S932102Ab3GKQEv (ORCPT ); Thu, 11 Jul 2013 12:04:51 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from yishaih@mellanox.com) with SMTP; 11 Jul 2013 19:04:47 +0300 Received: from vnc17.lab.mtl.com (vnc17.mtl.labs.mlnx [10.7.2.17]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id r6BG4iV4019179; Thu, 11 Jul 2013 19:04:44 +0300 Received: from vnc17.lab.mtl.com (localhost.localdomain [127.0.0.1]) by vnc17.lab.mtl.com (8.13.8/8.13.8) with ESMTP id r6BF5sjG001563; Thu, 11 Jul 2013 18:05:54 +0300 Received: (from yishaih@localhost) by vnc17.lab.mtl.com (8.13.8/8.13.8/Submit) id r6BF5swP001562; Thu, 11 Jul 2013 18:05:54 +0300 From: Yishai Hadas To: linux-rdma@vger.kernel.org, roland@purestorage.com Cc: tzahio@mellanox.com, yishaih@mellanox.com, Jay Sternberg , Sean Hefty Subject: [PATCH V7 libibverbs 7/7] Add XRC sample application Date: Thu, 11 Jul 2013 18:04:23 +0300 Message-Id: <1373555063-31790-8-git-send-email-yishaih@mellanox.com> X-Mailer: git-send-email 1.7.8.2 In-Reply-To: <1373555063-31790-1-git-send-email-yishaih@mellanox.com> References: <1373555063-31790-1-git-send-email-yishaih@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Spam-Status: No, score=-7.2 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_HI, RP_MATCHES_RCVD, T_FRT_STOCK2, UNPARSEABLE_RELAY autolearn=ham version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Jay Sternberg Signed-off-by: Jay Sternberg Signed-off-by: Sean Hefty Signed-off-by: Yishai Hadas --- Changes from V6: MSG_SIZE - fixed size to be 31. open("/tmp/xrc_domain ...) - added missing parameters to fix compilation. on some platforms. ibv_get_srq_num - adapted to new API. remote_srqn - adapted to be set on ibv_send_wr struct. Makefile.am | 4 +- examples/xsrq_pingpong.c | 890 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 893 insertions(+), 1 deletions(-) create mode 100644 examples/xsrq_pingpong.c diff --git a/Makefile.am b/Makefile.am index aa6097a..a6767de 100644 --- a/Makefile.am +++ b/Makefile.am @@ -18,7 +18,7 @@ src_libibverbs_la_DEPENDENCIES = $(srcdir)/src/libibverbs.map bin_PROGRAMS = examples/ibv_devices examples/ibv_devinfo \ examples/ibv_asyncwatch examples/ibv_rc_pingpong examples/ibv_uc_pingpong \ - examples/ibv_ud_pingpong examples/ibv_srq_pingpong + examples/ibv_ud_pingpong examples/ibv_srq_pingpong examples/ibv_xsrq_pingpong examples_ibv_devices_SOURCES = examples/device_list.c examples_ibv_devices_LDADD = $(top_builddir)/src/libibverbs.la examples_ibv_devinfo_SOURCES = examples/devinfo.c @@ -31,6 +31,8 @@ examples_ibv_ud_pingpong_SOURCES = examples/ud_pingpong.c examples/pingpong.c examples_ibv_ud_pingpong_LDADD = $(top_builddir)/src/libibverbs.la examples_ibv_srq_pingpong_SOURCES = examples/srq_pingpong.c examples/pingpong.c examples_ibv_srq_pingpong_LDADD = $(top_builddir)/src/libibverbs.la +examples_ibv_xsrq_pingpong_SOURCES = examples/xsrq_pingpong.c examples/pingpong.c +examples_ibv_xsrq_pingpong_LDADD = $(top_builddir)/src/libibverbs.la examples_ibv_asyncwatch_SOURCES = examples/asyncwatch.c examples_ibv_asyncwatch_LDADD = $(top_builddir)/src/libibverbs.la diff --git a/examples/xsrq_pingpong.c b/examples/xsrq_pingpong.c new file mode 100644 index 0000000..b5fc1b6 --- /dev/null +++ b/examples/xsrq_pingpong.c @@ -0,0 +1,890 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2011 Intel Corporation, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if HAVE_CONFIG_H +# include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pingpong.h" + +#define MSG_FORMAT "%04x:%06x:%06x:%06x:%04x" +#define MSG_SIZE 31 +#define MSG_SSCAN "%x:%x:%x:%x:%x" +#define ADDR_FORMAT \ + "%8s: LID %04x, QPN RECV %06x SEND %06x, PSN %06x, SRQN %04x\n" +static int page_size; + +struct pingpong_dest { + int lid; + int recv_qpn; + int send_qpn; + int recv_psn; + int send_psn; + int srqn; + int pp_cnt; +}; + +struct pingpong_context { + struct ibv_context *context; + struct ibv_comp_channel *channel; + struct ibv_pd *pd; + struct ibv_mr *mr; + struct ibv_cq *send_cq; + struct ibv_cq *recv_cq; + struct ibv_srq *srq; + struct ibv_xrcd *xrcd; + struct ibv_qp **recv_qp; + struct ibv_qp **send_qp; + struct pingpong_dest *rem_dest; + void *buf; + int lid; + int sl; + enum ibv_mtu mtu; + int ib_port; + int fd; + int size; + int num_clients; + int num_tests; + int use_event; +}; + +struct pingpong_context ctx; + + +static int open_device(char *ib_devname) +{ + struct ibv_device **dev_list; + int i = 0; + + dev_list = ibv_get_device_list(NULL); + if (!dev_list) { + fprintf(stderr, "Failed to get IB devices list"); + return -1; + } + + if (ib_devname) { + for (; dev_list[i]; ++i) { + if (!strcmp(ibv_get_device_name(dev_list[i]), ib_devname)) + break; + } + } + if (!dev_list[i]) { + fprintf(stderr, "IB device %s not found\n", + ib_devname ? ib_devname : ""); + return -1; + } + + ctx.context = ibv_open_device(dev_list[i]); + if (!ctx.context) { + fprintf(stderr, "Couldn't get context for %s\n", + ibv_get_device_name(dev_list[i])); + return -1; + } + + ibv_free_device_list(dev_list); + return 0; +} + +static int create_qps(void) +{ + struct ibv_qp_init_attr_ex init; + struct ibv_qp_attr mod; + int i; + + for (i = 0; i < ctx.num_clients; ++i) { + + memset(&init, 0, sizeof init); + init.qp_type = IBV_QPT_XRC_RECV; + init.comp_mask = IBV_QP_INIT_ATTR_XRCD; + init.xrcd = ctx.xrcd; + + ctx.recv_qp[i] = ibv_create_qp_ex(ctx.context, &init); + if (!ctx.recv_qp[i]) { + fprintf(stderr, "Couldn't create recv QP[%d] errno %d\n", + i, errno); + return 1; + } + + mod.qp_state = IBV_QPS_INIT; + mod.pkey_index = 0; + mod.port_num = ctx.ib_port; + mod.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ; + + if (ibv_modify_qp(ctx.recv_qp[i], &mod, + IBV_QP_STATE | IBV_QP_PKEY_INDEX | + IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)) { + fprintf(stderr, "Failed to modify recv QP[%d] to INIT\n", i); + return 1; + } + + memset(&init, 0, sizeof init); + init.qp_type = IBV_QPT_XRC_SEND; + init.send_cq = ctx.send_cq; + init.cap.max_send_wr = ctx.num_clients * ctx.num_tests; + init.cap.max_send_sge = 1; + init.comp_mask = IBV_QP_INIT_ATTR_PD; + init.pd = ctx.pd; + + ctx.send_qp[i] = ibv_create_qp_ex(ctx.context, &init); + if (!ctx.send_qp[i]) { + fprintf(stderr, "Couldn't create send QP[%d] errno %d\n", + i, errno); + return 1; + } + + mod.qp_state = IBV_QPS_INIT; + mod.pkey_index = 0; + mod.port_num = ctx.ib_port; + mod.qp_access_flags = 0; + + if (ibv_modify_qp(ctx.send_qp[i], &mod, + IBV_QP_STATE | IBV_QP_PKEY_INDEX | + IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)) { + fprintf(stderr, "Failed to modify send QP[%d] to INIT\n", i); + return 1; + } + } + + return 0; +} + +static int pp_init_ctx(char *ib_devname) +{ + struct ibv_srq_init_attr_ex attr; + struct ibv_xrcd_init_attr xrcd_attr; + + ctx.recv_qp = calloc(ctx.num_clients, sizeof *ctx.recv_qp); + ctx.send_qp = calloc(ctx.num_clients, sizeof *ctx.send_qp); + ctx.rem_dest = calloc(ctx.num_clients, sizeof *ctx.rem_dest); + if (!ctx.recv_qp || !ctx.send_qp || !ctx.rem_dest) + return 1; + + if (open_device(ib_devname)) { + fprintf(stderr, "Failed to open device\n"); + return 1; + } + + ctx.lid = pp_get_local_lid(ctx.context, ctx.ib_port); + if (ctx.lid < 0) { + fprintf(stderr, "Failed to get SLID\n"); + return 1; + } + + ctx.buf = memalign(page_size, ctx.size); + if (!ctx.buf) { + fprintf(stderr, "Couldn't allocate work buf.\n"); + return 1; + } + + memset(ctx.buf, 0, ctx.size); + + if (ctx.use_event) { + ctx.channel = ibv_create_comp_channel(ctx.context); + if (!ctx.channel) { + fprintf(stderr, "Couldn't create completion channel\n"); + return 1; + } + } + + ctx.pd = ibv_alloc_pd(ctx.context); + if (!ctx.pd) { + fprintf(stderr, "Couldn't allocate PD\n"); + return 1; + } + + ctx.mr = ibv_reg_mr(ctx.pd, ctx.buf, ctx.size, IBV_ACCESS_LOCAL_WRITE); + if (!ctx.mr) { + fprintf(stderr, "Couldn't register MR\n"); + return 1; + } + + ctx.fd = open("/tmp/xrc_domain", O_RDONLY | O_CREAT, S_IRUSR | S_IRGRP); + if (ctx.fd < 0) { + fprintf(stderr, + "Couldn't create the file for the XRC Domain " + "but not stopping %d\n", errno); + ctx.fd = -1; + } + + memset(&xrcd_attr, 0, sizeof xrcd_attr); + xrcd_attr.comp_mask = IBV_XRCD_INIT_ATTR_FD | IBV_XRCD_INIT_ATTR_OFLAGS; + xrcd_attr.fd = ctx.fd; + xrcd_attr.oflags = O_CREAT; + ctx.xrcd = ibv_open_xrcd(ctx.context, &xrcd_attr); + if (!ctx.xrcd) { + fprintf(stderr, "Couldn't Open the XRC Domain %d\n", errno); + return 1; + } + + ctx.recv_cq = ibv_create_cq(ctx.context, ctx.num_clients, &ctx.recv_cq, + ctx.channel, 0); + if (!ctx.recv_cq) { + fprintf(stderr, "Couldn't create recv CQ\n"); + return 1; + } + + if (ctx.use_event) { + if (ibv_req_notify_cq(ctx.recv_cq, 0)) { + fprintf(stderr, "Couldn't request CQ notification\n"); + return 1; + } + } + + ctx.send_cq = ibv_create_cq(ctx.context, ctx.num_clients, NULL, NULL, 0); + if (!ctx.send_cq) { + fprintf(stderr, "Couldn't create send CQ\n"); + return 1; + } + + memset(&attr, 0, sizeof attr); + attr.attr.max_wr = ctx.num_clients; + attr.attr.max_sge = 1; + attr.comp_mask = IBV_SRQ_INIT_ATTR_TYPE | IBV_SRQ_INIT_ATTR_XRCD | + IBV_SRQ_INIT_ATTR_CQ | IBV_SRQ_INIT_ATTR_PD; + attr.srq_type = IBV_SRQT_XRC; + attr.xrcd = ctx.xrcd; + attr.cq = ctx.recv_cq; + attr.pd = ctx.pd; + + ctx.srq = ibv_create_srq_ex(ctx.context, &attr); + if (!ctx.srq) { + fprintf(stderr, "Couldn't create SRQ\n"); + return 1; + } + + if (create_qps()) + return 1; + + return 0; +} + +static int send_local_dest(int sockfd, int index) +{ + char msg[MSG_SIZE]; + uint32_t srq_num; + + ctx.rem_dest[index].recv_psn = lrand48() & 0xffffff; + if (ibv_get_srq_num(ctx.srq, &srq_num)) { + fprintf(stderr, "Couldn't get SRQ num\n"); + return -1; + } + + printf(ADDR_FORMAT, "local", ctx.lid, ctx.recv_qp[index]->qp_num, + ctx.send_qp[index]->qp_num, ctx.rem_dest[index].recv_psn, + srq_num); + + sprintf(msg, MSG_FORMAT, ctx.lid, ctx.recv_qp[index]->qp_num, + ctx.send_qp[index]->qp_num, ctx.rem_dest[index].recv_psn, + srq_num); + + if (write(sockfd, msg, MSG_SIZE) != MSG_SIZE) { + fprintf(stderr, "Couldn't send local address\n"); + return -1; + } + + return 0; +} + +static int recv_remote_dest(int sockfd, int index) +{ + struct pingpong_dest *rem_dest; + char msg[MSG_SIZE]; + int n = 0, r; + + while (n < MSG_SIZE) { + r = read(sockfd, msg + n, MSG_SIZE - n); + if (r < 0) { + perror("client read"); + fprintf(stderr, + "%d/%d: Couldn't read remote address [%d]\n", + n, MSG_SIZE, index); + return -1; + } + n += r; + } + + rem_dest = &ctx.rem_dest[index]; + sscanf(msg, MSG_SSCAN, &rem_dest->lid, &rem_dest->recv_qpn, + &rem_dest->send_qpn, &rem_dest->send_psn, &rem_dest->srqn); + + printf(ADDR_FORMAT, "remote", rem_dest->lid, rem_dest->recv_qpn, + rem_dest->send_qpn, rem_dest->send_psn, rem_dest->srqn); + + return 0; +} + +static int connect_qps(int index) +{ + struct ibv_qp_attr attr; + + memset(&attr, 0, sizeof attr); + attr.qp_state = IBV_QPS_RTR; + attr.dest_qp_num = ctx.rem_dest[index].send_qpn; + attr.path_mtu = ctx.mtu; + attr.rq_psn = ctx.rem_dest[index].send_psn; + attr.min_rnr_timer = 12; + attr.ah_attr.dlid = ctx.rem_dest[index].lid; + attr.ah_attr.sl = ctx.sl; + attr.ah_attr.port_num = ctx.ib_port; + + if (ibv_modify_qp(ctx.recv_qp[index], &attr, + IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | + IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | + IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER)) { + fprintf(stderr, "Failed to modify send QP[%d] to RTR\n", index); + return 1; + } + + memset(&attr, 0, sizeof attr); + attr.qp_state = IBV_QPS_RTS; + attr.timeout = 14; + attr.sq_psn = ctx.rem_dest[index].recv_psn; + + if (ibv_modify_qp(ctx.recv_qp[index], &attr, + IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_SQ_PSN)) { + fprintf(stderr, "Failed to modify recv QP[%d] to RTS\n", index); + return 1; + } + + memset(&attr, 0, sizeof attr); + attr.qp_state = IBV_QPS_RTR; + attr.dest_qp_num = ctx.rem_dest[index].recv_qpn; + attr.path_mtu = ctx.mtu; + attr.rq_psn = ctx.rem_dest[index].send_psn; + attr.ah_attr.dlid = ctx.rem_dest[index].lid; + attr.ah_attr.sl = ctx.sl; + attr.ah_attr.port_num = ctx.ib_port; + + if (ibv_modify_qp(ctx.send_qp[index], &attr, + IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | + IBV_QP_DEST_QPN | IBV_QP_RQ_PSN)) { + fprintf(stderr, "Failed to modify send QP[%d] to RTR\n", index); + return 1; + } + + memset(&attr, 0, sizeof attr); + attr.qp_state = IBV_QPS_RTS; + attr.timeout = 14; + attr.retry_cnt = 7; + attr.rnr_retry = 7; + attr.sq_psn = ctx.rem_dest[index].recv_psn; + + if (ibv_modify_qp(ctx.send_qp[index], &attr, + IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_SQ_PSN | + IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY | IBV_QP_MAX_QP_RD_ATOMIC)) { + fprintf(stderr, "Failed to modify send QP[%d] to RTS\n", index); + return 1; + } + + return 0; +} + +static int pp_client_connect(const char *servername, int port) +{ + struct addrinfo *res, *t; + char *service; + int ret; + int sockfd = -1; + struct addrinfo hints = { + .ai_family = AF_UNSPEC, + .ai_socktype = SOCK_STREAM + }; + + if (asprintf(&service, "%d", port) < 0) + return 1; + + ret = getaddrinfo(servername, service, &hints, &res); + if (ret < 0) { + fprintf(stderr, "%s for %s:%d\n", gai_strerror(ret), servername, port); + free(service); + return 1; + } + + for (t = res; t; t = t->ai_next) { + sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); + if (sockfd >= 0) { + if (!connect(sockfd, t->ai_addr, t->ai_addrlen)) + break; + close(sockfd); + sockfd = -1; + } + } + + freeaddrinfo(res); + free(service); + + if (sockfd < 0) { + fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port); + return 1; + } + + if (send_local_dest(sockfd, 0)) + return 1; + + if (recv_remote_dest(sockfd, 0)) + return 1; + + if (connect_qps(0)) + return 1; + + shutdown(sockfd, SHUT_RDWR); + close(sockfd); + return 0; +} + +static int pp_server_connect(int port) +{ + struct addrinfo *res, *t; + char *service; + int ret, i, n; + int sockfd = -1, connfd; + struct addrinfo hints = { + .ai_flags = AI_PASSIVE, + .ai_family = AF_UNSPEC, + .ai_socktype = SOCK_STREAM + }; + + if (asprintf(&service, "%d", port) < 0) + return 1; + + ret = getaddrinfo(NULL, service, &hints, &res); + if (ret < 0) { + fprintf(stderr, "%s for port %d\n", gai_strerror(ret), port); + free(service); + return 1; + } + + for (t = res; t; t = t->ai_next) { + sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol); + if (sockfd >= 0) { + n = 1; + setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n); + if (!bind(sockfd, t->ai_addr, t->ai_addrlen)) + break; + close(sockfd); + sockfd = -1; + } + } + + freeaddrinfo(res); + free(service); + + if (sockfd < 0) { + fprintf(stderr, "Couldn't listen to port %d\n", port); + return 1; + } + + listen(sockfd, ctx.num_clients); + + for (i = 0; i < ctx.num_clients; i++) { + connfd = accept(sockfd, NULL, 0); + if (connfd < 0) { + fprintf(stderr, "accept() failed for client %d\n", i); + return 1; + } + + if (recv_remote_dest(connfd, i)) + return 1; + + if (send_local_dest(connfd, i)) + return 1; + + if (connect_qps(i)) + return 1; + + shutdown(connfd, SHUT_RDWR); + close(connfd); + } + + close(sockfd); + return 0; +} + + +static int pp_close_ctx(void) +{ + int i; + + for (i = 0; i < ctx.num_clients; ++i) { + /* + * Receive QPs may be used by other clients. Leave them, they + * will be cleaned up when the xrcd is destroyed. + */ + if (ibv_destroy_qp(ctx.send_qp[i])) { + fprintf(stderr, "Couldn't destroy INI QP[%d]\n", i); + return 1; + } + } + + if (ibv_destroy_srq(ctx.srq)) { + fprintf(stderr, "Couldn't destroy SRQ\n"); + return 1; + } + + if (!ctx.xrcd && ibv_close_xrcd(ctx.xrcd)) { + fprintf(stderr, "Couldn't close the XRC Domain\n"); + return 1; + } + if (ctx.fd >= 0 && close(ctx.fd)) { + fprintf(stderr, "Couldn't close the file for the XRC Domain\n"); + return 1; + } + + if (ibv_destroy_cq(ctx.send_cq)) { + fprintf(stderr, "Couldn't destroy send CQ\n"); + return 1; + } + + if (ibv_destroy_cq(ctx.recv_cq)) { + fprintf(stderr, "Couldn't destroy recv CQ\n"); + return 1; + } + + if (ibv_dereg_mr(ctx.mr)) { + fprintf(stderr, "Couldn't deregister MR\n"); + return 1; + } + + if (ibv_dealloc_pd(ctx.pd)) { + fprintf(stderr, "Couldn't deallocate PD\n"); + return 1; + } + + if (ctx.channel) { + if (ibv_destroy_comp_channel(ctx.channel)) { + fprintf(stderr, + "Couldn't destroy completion channel\n"); + return 1; + } + } + + if (ibv_close_device(ctx.context)) { + fprintf(stderr, "Couldn't release context\n"); + return 1; + } + + free(ctx.buf); + free(ctx.rem_dest); + free(ctx.send_qp); + free(ctx.recv_qp); + return 0; +} + +static int pp_post_recv(int cnt) +{ + struct ibv_sge sge; + struct ibv_recv_wr wr, *bad_wr; + + sge.addr = (uintptr_t) ctx.buf; + sge.length = ctx.size; + sge.lkey = ctx.mr->lkey; + + wr.next = NULL; + wr.wr_id = (uintptr_t) &ctx; + wr.sg_list = &sge; + wr.num_sge = 1; + + while (cnt--) { + if (ibv_post_srq_recv(ctx.srq, &wr, &bad_wr)) { + fprintf(stderr, "Failed to post receive to SRQ\n"); + return 1; + } + } + return 0; +} + +/* + * Send to each client round robin on each set of xrc send/recv qp. + * Generate a completion on the last send. + */ +static int pp_post_send(int index) +{ + struct ibv_sge sge; + struct ibv_send_wr wr, *bad_wr; + int qpi; + + sge.addr = (uintptr_t) ctx.buf; + sge.length = ctx.size; + sge.lkey = ctx.mr->lkey; + + wr.wr_id = (uintptr_t) index; + wr.next = NULL; + wr.sg_list = &sge; + wr.num_sge = 1; + wr.opcode = IBV_WR_SEND; + wr.qp_type.xrc.remote_srqn = ctx.rem_dest[index].srqn; + + qpi = (index + ctx.rem_dest[index].pp_cnt) % ctx.num_clients; + wr.send_flags = (++ctx.rem_dest[index].pp_cnt >= ctx.num_tests) ? + IBV_SEND_SIGNALED : 0; + + return ibv_post_send(ctx.send_qp[qpi], &wr, &bad_wr); +} + +static int find_qp(int qpn) +{ + int i; + + if (ctx.num_clients == 1) + return 0; + + for (i = 0; i < ctx.num_clients; ++i) + if (ctx.recv_qp[i]->qp_num == qpn) + return i; + + fprintf(stderr, "Unable to find qp %x\n", qpn); + return 0; +} + +static int get_cq_event(void) +{ + struct ibv_cq *ev_cq; + void *ev_ctx; + + if (ibv_get_cq_event(ctx.channel, &ev_cq, &ev_ctx)) { + fprintf(stderr, "Failed to get cq_event\n"); + return 1; + } + + if (ev_cq != ctx.recv_cq) { + fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); + return 1; + } + + if (ibv_req_notify_cq(ctx.recv_cq, 0)) { + fprintf(stderr, "Couldn't request CQ notification\n"); + return 1; + } + + return 0; +} + +static void init(void) +{ + srand48(getpid() * time(NULL)); + + ctx.size = 4096; + ctx.ib_port = 1; + ctx.num_clients = 1; + ctx.num_tests = 5; + ctx.mtu = IBV_MTU_2048; + ctx.sl = 0; +} + +static void usage(const char *argv0) +{ + printf("Usage:\n"); + printf(" %s start a server and wait for connection\n", argv0); + printf(" %s connect to server at \n", argv0); + printf("\n"); + printf("Options:\n"); + printf(" -p, --port= listen on/connect to port (default 18515)\n"); + printf(" -d, --ib-dev= use IB device (default first device found)\n"); + printf(" -i, --ib-port= use port of IB device (default 1)\n"); + printf(" -s, --size= size of message to exchange (default 4096)\n"); + printf(" -m, --mtu= path MTU (default 2048)\n"); + printf(" -c, --clients= number of clients (on server only, default 1)\n"); + printf(" -n, --num_tests= number of tests per client (default 5)\n"); + printf(" -l, --sl= service level value\n"); + printf(" -e, --events sleep on CQ events (default poll)\n"); +} + +int main(int argc, char *argv[]) +{ + char *ib_devname = NULL; + char *servername = NULL; + int port = 18515; + int i, total, cnt = 0; + int ne, qpi, num_cq_events = 0; + struct ibv_wc wc; + + init(); + while (1) { + int c; + + static struct option long_options[] = { + { .name = "port", .has_arg = 1, .val = 'p' }, + { .name = "ib-dev", .has_arg = 1, .val = 'd' }, + { .name = "ib-port", .has_arg = 1, .val = 'i' }, + { .name = "size", .has_arg = 1, .val = 's' }, + { .name = "mtu", .has_arg = 1, .val = 'm' }, + { .name = "clients", .has_arg = 1, .val = 'c' }, + { .name = "num_tests", .has_arg = 1, .val = 'n' }, + { .name = "sl", .has_arg = 1, .val = 'l' }, + { .name = "events", .has_arg = 0, .val = 'e' }, + { 0 } + }; + + c = getopt_long(argc, argv, "p:d:i:s:m:c:n:l:e", long_options, + NULL); + if (c == -1) + break; + + switch (c) { + case 'p': + port = strtol(optarg, NULL, 0); + if (port < 0 || port > 65535) { + usage(argv[0]); + return 1; + } + break; + case 'd': + ib_devname = strdupa(optarg); + break; + case 'i': + ctx.ib_port = strtol(optarg, NULL, 0); + if (ctx.ib_port < 0) { + usage(argv[0]); + return 1; + } + break; + case 's': + ctx.size = strtol(optarg, NULL, 0); + break; + case 'm': + ctx.mtu = pp_mtu_to_enum(strtol(optarg, NULL, 0)); + if (ctx.mtu < 0) { + usage(argv[0]); + return 1; + } + break; + case 'c': + ctx.num_clients = strtol(optarg, NULL, 0); + break; + case 'n': + ctx.num_tests = strtol(optarg, NULL, 0); + break; + case 'l': + ctx.sl = strtol(optarg, NULL, 0); + break; + case 'e': + ctx.use_event = 1; + break; + default: + usage(argv[0]); + return 1; + } + } + + if (optind == argc - 1) { + servername = strdupa(argv[optind]); + ctx.num_clients = 1; + } else if (optind < argc) { + usage(argv[0]); + return 1; + } + + page_size = sysconf(_SC_PAGESIZE); + + if (pp_init_ctx(ib_devname)) + return 1; + + if (pp_post_recv(ctx.num_clients)) { + fprintf(stderr, "Couldn't post receives\n"); + return 1; + } + + if (servername) { + if (pp_client_connect(servername, port)) + return 1; + } else { + if (pp_server_connect(port)) + return 1; + + for (i = 0; i < ctx.num_clients; i++) + pp_post_send(i); + } + + total = ctx.num_clients * ctx.num_tests; + while (cnt < total) { + if (ctx.use_event) { + if (get_cq_event()) + return 1; + + ++num_cq_events; + } + + do { + ne = ibv_poll_cq(ctx.recv_cq, 1, &wc); + if (ne < 0) { + fprintf(stderr, "Error polling cq %d\n", ne); + return 1; + } else if (ne == 0) { + break; + } + + if (wc.status) { + fprintf(stderr, "Work completion error %d\n", wc.status); + return 1; + } + + pp_post_recv(ne); + qpi = find_qp(wc.qp_num); + if (ctx.rem_dest[qpi].pp_cnt < ctx.num_tests) + pp_post_send(qpi); + cnt += ne; + } while (ne > 0); + } + + for (cnt = 0; cnt < ctx.num_clients; cnt += ne) { + ne = ibv_poll_cq(ctx.send_cq, 1, &wc); + if (ne < 0) { + fprintf(stderr, "Error polling cq %d\n", ne); + return 1; + } + } + + if (ctx.use_event) + ibv_ack_cq_events(ctx.recv_cq, num_cq_events); + + if (pp_close_ctx()) + return 1; + + printf("success\n"); + return 0; +}