new file mode 100644
@@ -0,0 +1,514 @@
+/*
+ * InfiniBand Transport Layer
+ *
+ * Copyright (c) 2014 - 2017 ProfitBricks GmbH. All rights reserved.
+ * Authors: Fabian Holler < mail@fholler.de>
+ * Jack Wang <jinpu.wang@profitbricks.com>
+ * Kleber Souza <kleber.souza@profitbricks.com>
+ * Danil Kipnis <danil.kipnis@profitbricks.com>
+ * Roman Pen <roman.penyaev@profitbricks.com>
+ * Milind Dumbare <Milind.dumbare@gmail.com>
+ *
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ * of any contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ */
+
+#ifndef __IBTRS_H
+#define __IBTRS_H
+
+#include <linux/uio.h>
+#include <linux/types.h>
+#include <linux/uuid.h>
+#include <rdma/rdma_cm.h>
+#include <rdma/ib_cm.h>
+#include <linux/list.h>
+#include <linux/dma-direction.h>
+#include <rdma/ib_verbs.h>
+#include <linux/time.h>
+#include <linux/ktime.h>
+#include <linux/timekeeping.h>
+
+#define IBTRS_SERVER_PORT 1234
+#define WC_ARRAY_SIZE 16
+#define IB_APM_TIMEOUT 16 /* 4.096 * 2 ^ 16 = 260 msec */
+
+#define USR_MSG_CNT 64
+#define USR_CON_BUF_SIZE (USR_MSG_CNT * 2) /* double bufs for ACK's */
+
+#define DEFAULT_HEARTBEAT_TIMEOUT_MS 20000
+#define MIN_HEARTBEAT_TIMEOUT_MS 5000
+#define HEARTBEAT_INTV_MS 500
+#define HEARTBEAT_INTV_JIFFIES msecs_to_jiffies(HEARTBEAT_INTV_MS)
+
+#define MIN_RTR_CNT 1
+#define MAX_RTR_CNT 7
+
+/*
+ * With the current size of the tag allocated on the client, 4K is the maximum
+ * number of tags we can allocate. (see IBNBD-2321)
+ * This number is also used on the client to allocate the IU for the user
+ * connection to receive the RDMA addresses from the server.
+ */
+#define MAX_SESS_QUEUE_DEPTH 4096
+
+#define XX(a) case (a): return #a
+
+#define IBTRS_ADDRLEN sizeof("ipv6:[xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx]")
+
+static inline const char *ib_wc_opcode_str(enum ib_wc_opcode opcode)
+{
+ switch (opcode) {
+ XX(IB_WC_SEND);
+ XX(IB_WC_RDMA_WRITE);
+ XX(IB_WC_RDMA_READ);
+ XX(IB_WC_COMP_SWAP);
+ XX(IB_WC_FETCH_ADD);
+ /* recv-side); inbound completion */
+ XX(IB_WC_RECV);
+ XX(IB_WC_RECV_RDMA_WITH_IMM);
+ default: return "IB_WC_OPCODE_UNKNOWN";
+ }
+}
+
+
+struct ib_session {
+ struct ib_pd *pd;
+ struct ib_mr *mr;
+ struct ib_event_handler event_handler;
+};
+
+struct ibtrs_ib_path {
+ union ib_gid p_sgid;
+ union ib_gid p_dgid;
+};
+
+struct ib_con {
+ struct ib_qp *qp ____cacheline_aligned;
+ struct ib_cq *cq ____cacheline_aligned;
+ struct ib_send_wr beacon;
+ struct rdma_cm_id *cm_id;
+ struct ibtrs_ib_path pri_path;
+ struct ibtrs_ib_path cur_path;
+ char *addr;
+ char *hostname;
+};
+
+struct ibtrs_iu {
+ struct list_head list;
+ dma_addr_t dma_addr;
+ void *buf;
+ size_t size;
+ enum dma_data_direction direction;
+ bool is_msg;
+ u32 tag;
+};
+
+struct ibtrs_heartbeat {
+ atomic64_t send_ts_ms;
+ atomic64_t recv_ts_ms;
+ u32 timeout_ms;
+ u32 warn_timeout_ms;
+ char *addr;
+ char *hostname;
+};
+
+#define IBTRS_VERSION 2
+#define IBTRS_UUID_SIZE 16
+#define IO_MSG_SIZE 24
+#define IB_IMM_SIZE_BITS 32
+
+#define GCC_DIAGNOSTIC_AWARE ((__GNUC__ > 6))
+#if GCC_DIAGNOSTIC_AWARE
+#pragma GCC diagnostic push
+#pragma GCC diagnostic warning "-Wpadded"
+#endif
+
+/**
+ * enum ibtrs_msg_types - IBTRS message types. DO NOT REMOVE OR REORDER!!!
+ * @IBTRS_MSG_SESS_OPEN: Client requests new session on Server
+ * @IBTRS_MSG_SESS_OPEN_RESP: Server informs Client about session parameters
+ * @IBTRS_MSG_CON_OPEN: Client requests new connection to server
+ * @IBTRS_MSG_RDMA_WRITE: Client writes data per RDMA to Server
+ * @IBTRS_MSG_REQ_RDMA_WRITE: Client requests data transfer per RDMA
+ * @IBTRS_MSG_USER: Data transfer per Infiniband message
+ * @IBTRS_MSG_ERR: Fatal Error happened
+ * @IBTRS_MSG_SESS_INFO: Client requests about session info
+ */
+enum ibtrs_msg_types {
+ IBTRS_MSG_SESS_OPEN,
+ IBTRS_MSG_SESS_OPEN_RESP,
+ IBTRS_MSG_CON_OPEN,
+ IBTRS_MSG_RDMA_WRITE,
+ IBTRS_MSG_REQ_RDMA_WRITE,
+ IBTRS_MSG_USER,
+ IBTRS_MSG_ERROR,
+ IBTRS_MSG_SESS_INFO,
+};
+
+/**
+ * struct ibtrs_msg_hdr - Common header of all IBTRS messages
+ * @type: Message type, valid values see: enum ibtrs_msg_types
+ * @tsize: Total size of transferred data
+ *
+ * Don't move the first 8 padding bytes! It's a workaround for a kernel bug.
+ * See IBNBD-610 for details
+ *
+ * DO NOT CHANGE!
+ */
+struct ibtrs_msg_hdr {
+ u8 __padding1;
+ u8 type;
+ u16 __padding2;
+ u32 tsize;
+};
+
+#define IBTRS_HDR_LEN sizeof(struct ibtrs_msg_hdr)
+
+/**
+ * struct ibtrs_msg_session_open - Opens a new session between client and server
+ * @hdr: message header
+ * @uuid: client host identifier, unique until module reload
+ * @ver: IBTRS protocol version
+ * @con_cnt: number of connections in this session
+ * @reserved: reserved fields for future usage, 28 bytes is maximum for
+ * all IPv6/IPv4 session
+ *
+ * DO NOT CHANGE members before ver.
+ */
+struct ibtrs_msg_sess_open {
+ struct ibtrs_msg_hdr hdr;
+ u8 uuid[IBTRS_UUID_SIZE];
+ u8 ver;
+ u8 con_cnt;
+ u8 reserved[30];
+};
+
+/**
+ * struct ibtrs_msg_sess_info
+ * @hdr: message header
+ * @hostname: client host name
+ */
+struct ibtrs_msg_sess_info {
+ struct ibtrs_msg_hdr hdr;
+ u8 hostname[MAXHOSTNAMELEN];
+};
+
+#define MSG_SESS_INFO_SIZE sizeof(struct ibtrs_msg_sess_info)
+
+/*
+ * Data Layout in RDMA-Bufs:
+ *
+ * +---------RDMA-BUF--------+
+ * | Slice N |
+ * | +---------------------+ |
+ * | | I/O data | |
+ * | |---------------------| |
+ * | | IBNBD MSG | |
+ * | |---------------------| |
+ * | | IBTRS MSG | |
+ * | +---------------------+ |
+ * +-------------------------+
+ * | Slice N+1 |
+ * | +---------------------+ |
+ * | | I/O data | |
+ * | |---------------------| |
+ * | | IBNBD MSG | |
+ * | |---------------------| |
+ * | | IBTRS MSG | |
+ * | +---------------------+ |
+ * +-------------------------+
+ */
+
+#define IBTRS_MSG_RESV_LEN 128
+/**
+ * struct ibtrs_msg_sess_open_resp - Servers response to %IBTRS_MSG_SESS_OPEN
+ * @hdr: message header
+ * @ver: IBTRS protocol version
+ * @cnt: Number of rdma addresses in this message
+ * @rkey: remote key to allow client to access buffers
+ * @hostname: hostname of local host
+ * @reserved: reserved fields for future usage
+ * @max_inflight_msg: max inflight messages (queue-depth) in this session
+ * @max_io_size: max io size server supports
+ * @max_req_size: max infiniband message size server supports
+ * @addr: rdma addresses of buffers
+ *
+ * DO NOT CHANGE members before ver.
+ */
+struct ibtrs_msg_sess_open_resp {
+ struct ibtrs_msg_hdr hdr;
+ u8 ver;
+ u8 __padding1;
+ u16 cnt;
+ u32 rkey;
+ u8 hostname[MAXHOSTNAMELEN];
+ u8 reserved[IBTRS_MSG_RESV_LEN];
+ u16 max_inflight_msg;
+ u32 max_io_size;
+ u32 max_req_size;
+ u64 addr[];
+};
+
+#define IBTRS_MSG_SESS_OPEN_RESP_LEN(cnt) \
+ (sizeof(struct ibtrs_msg_sess_open_resp) + sizeof(u64) * cnt)
+/**
+ * struct ibtrs_msg_con_open - Opens a new connection between client and server
+ * @hdr: message header
+ * @uuid: client host identifier, unique until module reload
+ */
+struct ibtrs_msg_con_open {
+ struct ibtrs_msg_hdr hdr;
+ u8 uuid[IBTRS_UUID_SIZE];
+};
+
+/**
+ * struct ibtrs_msg_user - Data exchanged a Infiniband message
+ * @hdr: message header
+ * @payl: Payload from user user module
+ */
+struct ibtrs_msg_user {
+ struct ibtrs_msg_hdr hdr;
+ u8 payl[];
+};
+
+/**
+ * struct ibtrs_sg_desc - RDMA-Buffer entry description
+ * @addr: Address of RDMA destination buffer
+ * @key: Authorization rkey to write to the buffer
+ * @len: Size of the buffer
+ */
+struct ibtrs_sg_desc {
+ u64 addr;
+ u32 key;
+ u32 len;
+};
+
+#define IBTRS_SG_DESC_LEN sizeof(struct ibtrs_sg_desc)
+
+/**
+ * struct ibtrs_msg_req_rdma_write - RDMA data transfer request from client
+ * @hdr: message header
+ * @sg_cnt: number of @desc entries
+ * @desc: RDMA bufferst where the server can write the result to
+ */
+struct ibtrs_msg_req_rdma_write {
+ struct ibtrs_msg_hdr hdr;
+ u32 __padding;
+ u32 sg_cnt;
+ struct ibtrs_sg_desc desc[];
+};
+
+/**
+ * struct_msg_rdma_write - Message transferred to server with RDMA-Write
+ * @hdr: message header
+ */
+struct ibtrs_msg_rdma_write {
+ struct ibtrs_msg_hdr hdr;
+};
+
+/**
+ * struct ibtrs_msg_error - Error message
+ * @hdr: message header
+ * @errno: Errno number describing the error
+ */
+struct ibtrs_msg_error {
+ struct ibtrs_msg_hdr hdr;
+ s32 errno;
+ u32 __padding;
+};
+
+#if GCC_DIAGNOSTIC_AWARE
+#pragma GCC diagnostic pop
+#endif
+
+int ibtrs_validate_message(u16 queue_depth, const void *hdr);
+
+void fill_ibtrs_msg_sess_open(struct ibtrs_msg_sess_open *msg, u8 con_cnt,
+ const uuid_le *uuid);
+
+void fill_ibtrs_msg_con_open(struct ibtrs_msg_con_open *msg,
+ const uuid_le *uuid);
+
+void fill_ibtrs_msg_sess_info(struct ibtrs_msg_sess_info *msg,
+ const char *hostname);
+
+void ibtrs_heartbeat_set_send_ts(struct ibtrs_heartbeat *h);
+void ibtrs_set_last_heartbeat(struct ibtrs_heartbeat *h);
+u64 ibtrs_last_heartbeat_diff_ms(const struct ibtrs_heartbeat *h);
+u64 ibtrs_heartbeat_send_ts_diff_ms(const struct ibtrs_heartbeat *h);
+
+void ibtrs_set_heartbeat_timeout(struct ibtrs_heartbeat *h, u32 timeout_ms);
+
+void ibtrs_heartbeat_warn(const struct ibtrs_heartbeat *h);
+
+bool ibtrs_heartbeat_timeout_is_expired(const struct ibtrs_heartbeat *h);
+
+u32 ibtrs_heartbeat_get_send_delay(const struct ibtrs_heartbeat *h);
+u32 ibtrs_heartbeat_get_check_delay(const struct ibtrs_heartbeat *h);
+void ibtrs_iu_put(struct list_head *iu_list, struct ibtrs_iu *iu);
+struct ibtrs_iu *ibtrs_iu_get(struct list_head *iu_list);
+
+struct ibtrs_iu *ibtrs_iu_alloc(u32 tag, size_t size, gfp_t t,
+ struct ib_device *dev,
+ enum dma_data_direction, bool is_msg);
+
+void ibtrs_iu_free(struct ibtrs_iu *iu, enum dma_data_direction dir,
+ struct ib_device *dev);
+
+int ibtrs_write_empty_imm(struct ib_qp *qp, u32 imm_data,
+ enum ib_send_flags flags);
+
+int ibtrs_post_send(struct ib_qp *qp, struct ib_mr *mr, struct ibtrs_iu *iu,
+ u32 size);
+
+int ib_post_rdma_write_imm(struct ib_qp *qp, struct ib_sge *sge,
+ unsigned int num_sge, u32 rkey, u64 rdma_addr,
+ u64 wr_id, u32 imm_data, enum ib_send_flags flags);
+
+int ib_post_rdma_write(struct ib_qp *qp, struct ib_sge *sge,
+ unsigned int num_sge, u32 rkey, u64 rdma_addr,
+ u64 wr_id);
+int post_beacon(struct ib_con *con);
+/**
+ * ib_session_init() - Create a new IB session
+ */
+int ib_session_init(struct ib_device *dev, struct ib_session *session);
+
+/**
+ * ib_con_init() - initialize and add a ib_con to the session
+ * @con: &ib_con to initialize
+ * @session: session the &ib_con is added to
+ * @ctx: CQ context, returned to the user via completion handler
+ *
+ * Returns 0 on success otherwise a negative errno code
+ */
+int ib_con_init(struct ib_con *con, struct rdma_cm_id *cm_id,
+ u32 max_send_sge,
+ ib_comp_handler comp_handler, void *ctx, int cq_vector,
+ u16 cq_size, u16 wr_queue_size, struct ib_session *session);
+
+int ibtrs_request_cq_notifications(struct ib_con *con);
+
+void ib_con_destroy(struct ib_con *con);
+
+/**
+ * ib_session_destroy() - Free a session
+ * The corresponding &ib_con must have been freed before.
+ */
+void ib_session_destroy(struct ib_session *session);
+
+int ib_get_max_wr_queue_size(struct ib_device *dev);
+
+int ibtrs_addr_to_str(const struct sockaddr_storage *addr, char *buf,
+ size_t len);
+
+int ibtrs_heartbeat_timeout_validate(int timeout);
+
+/**
+ * kvec_length() - Total number of bytes covered by an kvec.
+ */
+static inline size_t kvec_length(const struct kvec *vec, size_t nr)
+{
+ size_t seg, ret = 0;
+
+ for (seg = 0; seg < nr; seg++)
+ ret += vec[seg].iov_len;
+ return ret;
+}
+
+/**
+ * copy_from_kvec() - Copy kvec to the buffer.
+ */
+static inline void copy_from_kvec(void *data, const struct kvec *vec,
+ size_t copy)
+{
+ size_t seg, len;
+
+ for (seg = 0; copy; seg++) {
+ len = min(vec[seg].iov_len, copy);
+ memcpy(data, vec[seg].iov_base, len);
+ data += len;
+ copy -= len;
+ }
+}
+
+static inline u64 timespec_to_ms(const struct timespec *ts)
+{
+ return timespec_to_ns(ts) / NSEC_PER_MSEC;
+}
+
+u64 timediff_cur_ms(u64 cur_ms);
+
+void *ibtrs_malloc(size_t size);
+void *ibtrs_zalloc(size_t size);
+
+#define STAT_STORE_FUNC(store, reset) \
+static ssize_t store##_store(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ const char *buf, size_t count) \
+{ \
+ int ret = -EINVAL; \
+ struct ibtrs_session *sess = container_of(kobj, struct ibtrs_session, \
+ kobj_stats); \
+\
+ if (sysfs_streq(buf, "1")) \
+ ret = reset(sess, true); \
+ else if (sysfs_streq(buf, "0"))\
+ ret = reset(sess, false); \
+ if (ret) \
+ return ret; \
+\
+ return count; \
+}
+
+#define STAT_SHOW_FUNC(show, print) \
+static ssize_t show##_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ char *page) \
+{ \
+ struct ibtrs_session *sess = container_of(kobj, struct ibtrs_session, \
+ kobj_stats); \
+\
+ return print(sess, page, PAGE_SIZE); \
+}
+
+#define STAT_ATTR(stat, print, reset) \
+STAT_STORE_FUNC(stat, reset) \
+STAT_SHOW_FUNC(stat, print) \
+static struct kobj_attribute stat##_attr = \
+ __ATTR(stat, 0644, \
+ stat##_show, \
+ stat##_store)
+
+#endif /*__IBTRS_H*/