@@ -1,3 +1,8 @@
config VMW_PVRDMA
default y if PCI_DEVICES
depends on PVRDMA && PCI && MSI_NONBROKEN
+
+config VIRTIO_RDMA
+ bool
+ default y
+ depends on VIRTIO
@@ -8,3 +8,13 @@ specific_ss.add(when: 'CONFIG_VMW_PVRDMA', if_true: files(
'vmw/pvrdma_main.c',
'vmw/pvrdma_qp_ops.c',
))
+
+specific_ss.add(when: 'CONFIG_VIRTIO_RDMA', if_true: files(
+ 'rdma.c',
+ 'rdma_backend.c',
+ 'rdma_rm.c',
+ 'rdma_utils.c',
+ 'virtio/virtio-rdma-main.c',
+ 'virtio/virtio-rdma-ib.c',
+ 'virtio/virtio-rdma-qp.c',
+))
new file mode 100644
@@ -0,0 +1,269 @@
+/*
+ * Virtio RDMA Device - QP ops
+ *
+ * Copyright (C) 2021 Bytedance Inc.
+ *
+ * Authors:
+ * Junji Wei <weijunji@bytedance.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef VIRTIO_RDMA_DEV_API_H
+#define VIRTIO_RDMA_DEV_API_H
+
+#include "virtio-rdma-ib.h"
+
+#define VIRTIO_RDMA_CTRL_OK 0
+#define VIRTIO_RDMA_CTRL_ERR 1
+
+enum {
+ VIRTIO_CMD_QUERY_DEVICE = 10,
+ VIRTIO_CMD_QUERY_PORT,
+ VIRTIO_CMD_CREATE_CQ,
+ VIRTIO_CMD_DESTROY_CQ,
+ VIRTIO_CMD_CREATE_PD,
+ VIRTIO_CMD_DESTROY_PD,
+ VIRTIO_CMD_GET_DMA_MR,
+ VIRTIO_CMD_CREATE_MR,
+ VIRTIO_CMD_MAP_MR_SG,
+ VIRTIO_CMD_REG_USER_MR,
+ VIRTIO_CMD_DEREG_MR,
+ VIRTIO_CMD_CREATE_QP,
+ VIRTIO_CMD_MODIFY_QP,
+ VIRTIO_CMD_QUERY_QP,
+ VIRTIO_CMD_DESTROY_QP,
+ VIRTIO_CMD_QUERY_GID,
+ VIRTIO_CMD_CREATE_UC,
+ VIRTIO_CMD_DEALLOC_UC,
+ VIRTIO_CMD_QUERY_PKEY,
+ VIRTIO_MAX_CMD_NUM,
+};
+
+struct control_buf {
+ uint8_t cmd;
+ uint8_t status;
+};
+
+struct cmd_query_port {
+ uint8_t port;
+};
+
+struct virtio_rdma_port_attr {
+ enum ibv_port_state state;
+ enum ibv_mtu max_mtu;
+ enum ibv_mtu active_mtu;
+ int gid_tbl_len;
+ unsigned int ip_gids:1;
+ uint32_t port_cap_flags;
+ uint32_t max_msg_sz;
+ uint32_t bad_pkey_cntr;
+ uint32_t qkey_viol_cntr;
+ uint16_t pkey_tbl_len;
+ uint32_t sm_lid;
+ uint32_t lid;
+ uint8_t lmc;
+ uint8_t max_vl_num;
+ uint8_t sm_sl;
+ uint8_t subnet_timeout;
+ uint8_t init_type_reply;
+ uint8_t active_width;
+ uint8_t active_speed;
+ uint8_t phys_state;
+ uint16_t port_cap_flags2;
+};
+
+struct cmd_create_cq {
+ uint32_t cqe;
+};
+
+struct rsp_create_cq {
+ uint32_t cqn;
+};
+
+struct cmd_destroy_cq {
+ uint32_t cqn;
+};
+
+struct cmd_create_pd {
+ uint32_t ctx_handle;
+};
+
+struct rsp_create_pd {
+ uint32_t pdn;
+};
+
+struct cmd_destroy_pd {
+ uint32_t pdn;
+};
+
+struct cmd_create_mr {
+ uint32_t pdn;
+ uint32_t access_flags;
+
+ uint32_t max_num_sg;
+};
+
+struct rsp_create_mr {
+ uint32_t mrn;
+ uint32_t lkey;
+ uint32_t rkey;
+};
+
+struct cmd_map_mr_sg {
+ uint32_t mrn;
+ uint64_t start;
+ uint32_t npages;
+
+ uint64_t pages;
+};
+
+struct rsp_map_mr_sg {
+ uint32_t npages;
+};
+
+struct cmd_reg_user_mr {
+ uint32_t pdn;
+ uint32_t access_flags;
+ uint64_t start;
+ uint64_t length;
+
+ uint64_t pages;
+ uint32_t npages;
+};
+
+struct rsp_reg_user_mr {
+ uint32_t mrn;
+ uint32_t lkey;
+ uint32_t rkey;
+};
+
+struct cmd_dereg_mr {
+ uint32_t mrn;
+
+ uint8_t is_user_mr;
+};
+
+struct rsp_dereg_mr {
+ uint32_t mrn;
+};
+
+struct cmd_create_qp {
+ uint32_t pdn;
+ uint8_t qp_type;
+ uint32_t max_send_wr;
+ uint32_t max_send_sge;
+ uint32_t send_cqn;
+ uint32_t max_recv_wr;
+ uint32_t max_recv_sge;
+ uint32_t recv_cqn;
+ uint8_t is_srq;
+ uint32_t srq_handle;
+};
+
+struct rsp_create_qp {
+ uint32_t qpn;
+};
+
+struct cmd_modify_qp {
+ uint32_t qpn;
+ uint32_t attr_mask;
+ struct virtio_rdma_qp_attr attr;
+};
+
+struct cmd_destroy_qp {
+ uint32_t qpn;
+};
+
+struct rsp_destroy_qp {
+ uint32_t qpn;
+};
+
+struct cmd_query_qp {
+ uint32_t qpn;
+ uint32_t attr_mask;
+};
+
+struct rsp_query_qp {
+ struct virtio_rdma_qp_attr attr;
+};
+
+struct cmd_query_gid {
+ uint8_t port;
+ uint32_t index;
+};
+
+struct cmd_create_uc {
+ uint64_t pfn;
+};
+
+struct rsp_create_uc {
+ uint32_t ctx_handle;
+};
+
+struct cmd_dealloc_uc {
+ uint32_t ctx_handle;
+};
+
+struct rsp_dealloc_uc {
+ uint32_t ctx_handle;
+};
+
+struct cmd_query_pkey {
+ __u8 port;
+ __u16 index;
+};
+
+struct rsp_query_pkey {
+ __u16 pkey;
+};
+
+struct cmd_post_send {
+ __u32 qpn;
+ __u32 is_kernel;
+ __u32 num_sge;
+
+ int send_flags;
+ enum virtio_rdma_wr_opcode opcode;
+ __u64 wr_id;
+
+ union {
+ __be32 imm_data;
+ __u32 invalidate_rkey;
+ } ex;
+
+ union {
+ struct {
+ __u64 remote_addr;
+ __u32 rkey;
+ } rdma;
+ struct {
+ __u64 remote_addr;
+ __u64 compare_add;
+ __u64 swap;
+ __u32 rkey;
+ } atomic;
+ struct {
+ __u32 remote_qpn;
+ __u32 remote_qkey;
+ __u32 ahn;
+ } ud;
+ struct {
+ __u32 mrn;
+ __u32 key;
+ int access;
+ } reg;
+ } wr;
+};
+
+struct cmd_post_recv {
+ __u32 qpn;
+ __u32 is_kernel;
+
+ __u32 num_sge;
+ __u64 wr_id;
+};
+
+#endif
new file mode 100644
@@ -0,0 +1,764 @@
+/*
+ * Virtio RDMA Device - IB verbs
+ *
+ * Copyright (C) 2019 Oracle
+ *
+ * Authors:
+ * Yuval Shaia <yuval.shaia@oracle.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include <infiniband/verbs.h>
+
+#include "qemu/osdep.h"
+#include "qemu/atomic.h"
+#include "cpu.h"
+
+#include "virtio-rdma-ib.h"
+#include "virtio-rdma-qp.h"
+#include "virtio-rdma-dev-api.h"
+
+#include "../rdma_utils.h"
+#include "../rdma_rm.h"
+#include "../rdma_backend.h"
+
+#include <malloc.h>
+
+int virtio_rdma_query_device(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out)
+{
+ int offs;
+ size_t s;
+
+ addrconf_addr_eui48((unsigned char *)&rdev->dev_attr.sys_image_guid,
+ (const char *)&rdev->netdev->mac);
+
+ offs = offsetof(struct ibv_device_attr, sys_image_guid);
+ s = iov_from_buf(out, 1, 0, (void *)&rdev->dev_attr + offs, sizeof(rdev->dev_attr) - offs);
+
+ return s == sizeof(rdev->dev_attr) - offs ? VIRTIO_RDMA_CTRL_OK :
+ VIRTIO_RDMA_CTRL_ERR;
+}
+
+int virtio_rdma_query_port(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out)
+{
+ struct virtio_rdma_port_attr attr = {};
+ struct ibv_port_attr vattr = {};
+ struct cmd_query_port cmd = {};
+ int offs;
+ size_t s;
+
+ s = iov_to_buf(in, 1, 0, &cmd, sizeof(cmd));
+ if (s != sizeof(cmd)) {
+ return VIRTIO_RDMA_CTRL_ERR;
+ }
+
+ if (cmd.port != 1) {
+ return VIRTIO_RDMA_CTRL_ERR;
+ }
+
+ if(rdma_backend_query_port(rdev->backend_dev, &vattr))
+ return VIRTIO_RDMA_CTRL_ERR;
+
+ attr.state = vattr.state;
+ attr.max_mtu = vattr.max_mtu;
+ attr.active_mtu = vattr.active_mtu;
+ attr.gid_tbl_len = vattr.gid_tbl_len;
+ attr.port_cap_flags = vattr.port_cap_flags;
+ attr.max_msg_sz = vattr.max_msg_sz;
+ attr.bad_pkey_cntr = vattr.bad_pkey_cntr;
+ attr.qkey_viol_cntr = vattr.qkey_viol_cntr;
+ attr.pkey_tbl_len = vattr.pkey_tbl_len;
+ attr.lid = vattr.lid;
+ attr.sm_lid = vattr.sm_lid;
+ attr.lmc = vattr.lmc;
+ attr.max_vl_num = vattr.max_vl_num;
+ attr.sm_sl = vattr.sm_sl;
+ attr.subnet_timeout = vattr.subnet_timeout;
+ attr.init_type_reply = vattr.init_type_reply;
+ attr.active_width = vattr.active_width;
+ attr.active_speed = vattr.phys_state;
+ attr.phys_state = vattr.phys_state;
+ attr.port_cap_flags2 = vattr.port_cap_flags2;
+
+ offs = offsetof(struct virtio_rdma_port_attr, state);
+
+ s = iov_from_buf(out, 1, 0, (void *)&attr + offs, sizeof(attr) - offs);
+
+ return s == sizeof(attr) - offs ? VIRTIO_RDMA_CTRL_OK :
+ VIRTIO_RDMA_CTRL_ERR;
+}
+
+int virtio_rdma_create_cq(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out)
+{
+ struct cmd_create_cq cmd = {};
+ struct rsp_create_cq rsp = {};
+ size_t s;
+ int rc;
+
+ s = iov_to_buf(in, 1, 0, &cmd, sizeof(cmd));
+ if (s != sizeof(cmd)) {
+ return VIRTIO_RDMA_CTRL_ERR;
+ }
+
+ /* TODO: Define MAX_CQE */
+#define MAX_CQE 1024
+ /* TODO: Check MAX_CQ */
+ if (cmd.cqe > MAX_CQE) {
+ return VIRTIO_RDMA_CTRL_ERR;
+ }
+
+ printf("%s: %d\n", __func__, cmd.cqe);
+
+ rc = rdma_rm_alloc_cq(rdev->rdma_dev_res, rdev->backend_dev, cmd.cqe,
+ &rsp.cqn, NULL);
+ if (rc)
+ return VIRTIO_RDMA_CTRL_ERR;
+
+ printf("%s: %d\n", __func__, rsp.cqn);
+
+ s = iov_from_buf(out, 1, 0, &rsp, sizeof(rsp));
+
+ return s == sizeof(rsp) ? VIRTIO_RDMA_CTRL_OK :
+ VIRTIO_RDMA_CTRL_ERR;
+}
+
+int virtio_rdma_destroy_cq(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out)
+{
+ struct cmd_destroy_cq cmd = {};
+ size_t s;
+
+ s = iov_to_buf(in, 1, 0, &cmd, sizeof(cmd));
+ if (s != sizeof(cmd)) {
+ return VIRTIO_RDMA_CTRL_ERR;
+ }
+
+ printf("%s: %d\n", __func__, cmd.cqn);
+
+ virtqueue_drop_all(rdev->cq_vqs[cmd.cqn]);
+ rdma_rm_dealloc_cq(rdev->rdma_dev_res, cmd.cqn);
+
+ return VIRTIO_RDMA_CTRL_OK;
+}
+
+int virtio_rdma_create_pd(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out)
+{
+ struct cmd_create_pd cmd = {};
+ struct rsp_create_pd rsp = {};
+ size_t s;
+ int rc;
+
+ if (qatomic_inc_fetch(&rdev->num_pd) > rdev->dev_attr.max_pd)
+ goto err;
+
+ s = iov_to_buf(in, 1, 0, &cmd, sizeof(cmd));
+ if (s != sizeof(cmd))
+ goto err;
+
+ /* TODO: Check MAX_PD */
+
+ rc = rdma_rm_alloc_pd(rdev->rdma_dev_res, rdev->backend_dev, &rsp.pdn,
+ cmd.ctx_handle);
+ if (rc)
+ goto err;
+
+ printf("%s: pdn %d num_pd %d\n", __func__, rsp.pdn, qatomic_read(&rdev->num_pd));
+
+ s = iov_from_buf(out, 1, 0, &rsp, sizeof(rsp));
+
+ if (s == sizeof(rsp))
+ return VIRTIO_RDMA_CTRL_OK;
+
+err:
+ qatomic_dec(&rdev->num_pd);
+ return VIRTIO_RDMA_CTRL_ERR;
+}
+
+int virtio_rdma_destroy_pd(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out)
+{
+ struct cmd_destroy_pd cmd = {};
+ size_t s;
+
+ s = iov_to_buf(in, 1, 0, &cmd, sizeof(cmd));
+ if (s != sizeof(cmd)) {
+ return VIRTIO_RDMA_CTRL_ERR;
+ }
+
+ printf("%s: %d\n", __func__, cmd.pdn);
+
+ rdma_rm_dealloc_pd(rdev->rdma_dev_res, cmd.pdn);
+
+ return VIRTIO_RDMA_CTRL_OK;
+}
+
+int virtio_rdma_get_dma_mr(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out)
+{
+ struct cmd_create_mr cmd = {};
+ struct rsp_create_mr rsp = {};
+ size_t s;
+ uint32_t *htbl_key;
+ struct virtio_rdma_kernel_mr *kernel_mr;
+
+ // FIXME: how to support dma mr
+ rdma_warn_report("DMA mr is not supported now");
+
+ htbl_key = g_malloc0(sizeof(*htbl_key));
+ if (htbl_key == NULL)
+ return VIRTIO_RDMA_CTRL_ERR;
+
+ kernel_mr = g_malloc0(sizeof(*kernel_mr));
+ if (kernel_mr == NULL) {
+ g_free(htbl_key);
+ return VIRTIO_RDMA_CTRL_ERR;
+ }
+
+ s = iov_to_buf(in, 1, 0, &cmd, sizeof(cmd));
+ if (s != sizeof(cmd)) {
+ g_free(kernel_mr);
+ g_free(htbl_key);
+ return VIRTIO_RDMA_CTRL_ERR;
+ }
+
+ rdma_rm_alloc_mr(rdev->rdma_dev_res, cmd.pdn, 0, 0, NULL, cmd.access_flags, &rsp.mrn, &rsp.lkey, &rsp.rkey);
+
+ *htbl_key = rsp.lkey;
+ kernel_mr->dummy_mr = rdma_rm_get_mr(rdev->rdma_dev_res, rsp.mrn);
+ kernel_mr->max_num_sg = cmd.max_num_sg;
+ kernel_mr->real_mr = NULL;
+ kernel_mr->dma_mr = true;
+ g_hash_table_insert(rdev->lkey_mr_tbl, htbl_key, kernel_mr);
+
+ s = iov_from_buf(out, 1, 0, &rsp, sizeof(rsp));
+
+ return s == sizeof(rsp) ? VIRTIO_RDMA_CTRL_OK :
+ VIRTIO_RDMA_CTRL_ERR;
+}
+
+int virtio_rdma_create_mr(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out)
+{
+ struct cmd_create_mr cmd = {};
+ struct rsp_create_mr rsp = {};
+ size_t s;
+ void* map_addr;
+ // uint64_t length;
+ uint32_t *htbl_key;
+ struct virtio_rdma_kernel_mr *kernel_mr;
+ RdmaRmMR *mr;
+
+ htbl_key = g_malloc0(sizeof(*htbl_key));
+ if (htbl_key == NULL)
+ return VIRTIO_RDMA_CTRL_ERR;
+
+ kernel_mr = g_malloc0(sizeof(*kernel_mr));
+ if (kernel_mr == NULL) {
+ g_free(htbl_key);
+ return VIRTIO_RDMA_CTRL_ERR;
+ }
+
+ s = iov_to_buf(in, 1, 0, &cmd, sizeof(cmd));
+ if (s != sizeof(cmd)) {
+ g_free(kernel_mr);
+ g_free(htbl_key);
+ return VIRTIO_RDMA_CTRL_ERR;
+ }
+
+ // when length is zero, will return same lkey
+ map_addr = mmap(0, TARGET_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+ rdma_rm_alloc_mr(rdev->rdma_dev_res, cmd.pdn, (uint64_t)map_addr, TARGET_PAGE_SIZE, map_addr, cmd.access_flags, &rsp.mrn, &rsp.lkey, &rsp.rkey);
+ // rkey is -1, because in kernel mode mr cannot access from remotes
+
+ /* we need to build a lkey to MR map, in order to set the local address
+ * in post_send and post_recv.
+ */
+ *htbl_key = rsp.lkey;
+ mr = rdma_rm_get_mr(rdev->rdma_dev_res, rsp.mrn);
+ mr->lkey = rsp.lkey;
+ kernel_mr->dummy_mr = mr;
+ kernel_mr->max_num_sg = cmd.max_num_sg;
+ kernel_mr->real_mr = NULL;
+ kernel_mr->dma_mr = false;
+ g_hash_table_insert(rdev->lkey_mr_tbl, htbl_key, kernel_mr);
+
+ s = iov_from_buf(out, 1, 0, &rsp, sizeof(rsp));
+
+ return s == sizeof(rsp) ? VIRTIO_RDMA_CTRL_OK :
+ VIRTIO_RDMA_CTRL_ERR;
+}
+
+static int remap_pages(AddressSpace *as, uint64_t *pages, void* remap_start, int npages)
+{
+ int i;
+ void* addr;
+ void* curr_page;
+ dma_addr_t len = TARGET_PAGE_SIZE;
+
+ for (i = 0; i < npages; i++) {
+ rdma_info_report("remap page %lx to %p", pages[i], remap_start + TARGET_PAGE_SIZE * i);
+ curr_page = dma_memory_map(as, pages[i], &len, DMA_DIRECTION_TO_DEVICE);
+ addr = mremap(curr_page, 0, TARGET_PAGE_SIZE, MREMAP_MAYMOVE | MREMAP_FIXED,
+ remap_start + TARGET_PAGE_SIZE * i);
+ dma_memory_unmap(as, curr_page, TARGET_PAGE_SIZE, DMA_DIRECTION_TO_DEVICE, 0);
+ if (addr == MAP_FAILED)
+ break;
+ }
+ return i;
+}
+
+int virtio_rdma_map_mr_sg(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out)
+{
+ struct cmd_map_mr_sg cmd = {};
+ struct rsp_map_mr_sg rsp = {};
+ size_t s;
+ uint64_t *pages;
+ dma_addr_t len = TARGET_PAGE_SIZE;
+ RdmaRmMR *mr;
+ void *remap_addr;
+ AddressSpace *dma_as = VIRTIO_DEVICE(rdev)->dma_as;
+ struct virtio_rdma_kernel_mr *kmr;
+ uint32_t num_pages;
+
+ s = iov_to_buf(in, 1, 0, &cmd, sizeof(cmd));
+ if (s != sizeof(cmd)) {
+ return VIRTIO_RDMA_CTRL_ERR;
+ }
+
+ mr = rdma_rm_get_mr(rdev->rdma_dev_res, cmd.mrn);
+ if (!mr) {
+ rdma_error_report("get mr failed\n");
+ return VIRTIO_RDMA_CTRL_ERR;
+ }
+
+ pages = dma_memory_map(dma_as, cmd.pages, &len, DMA_DIRECTION_TO_DEVICE);
+
+ kmr = g_hash_table_lookup(rdev->lkey_mr_tbl, &mr->lkey);
+ if (!kmr) {
+ rdma_error_report("Get kmr failed\n");
+ return VIRTIO_RDMA_CTRL_ERR;
+ }
+
+ num_pages = kmr->max_num_sg > cmd.npages ? cmd.npages : kmr->max_num_sg;
+ remap_addr = mmap(0, num_pages * TARGET_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+
+ rsp.npages = remap_pages(dma_as, pages, remap_addr, num_pages);
+ dma_memory_unmap(dma_as, pages, len, DMA_DIRECTION_TO_DEVICE, 0);
+
+ // rdma_rm_alloc_mr(rdev->rdma_dev_res, mr->pd_handle, (uint64_t)remap_addr, num_pages * TARGET_PAGE_SIZE,
+ // remap_addr, IBV_ACCESS_LOCAL_WRITE, &kmr->mrn, &kmr->lkey, &kmr->rkey);
+
+ kmr->virt = remap_addr;
+ kmr->length = num_pages * TARGET_PAGE_SIZE;
+ kmr->start = cmd.start;
+ // kmr->real_mr = rdma_rm_get_mr(rdev->rdma_dev_res, kmr->mrn);
+
+ s = iov_from_buf(out, 1, 0, &rsp, sizeof(rsp));
+
+ return s == sizeof(rsp) ? VIRTIO_RDMA_CTRL_OK :
+ VIRTIO_RDMA_CTRL_ERR;
+}
+
+int virtio_rdma_reg_user_mr(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out)
+{
+ struct cmd_reg_user_mr cmd = {};
+ struct rsp_reg_user_mr rsp = {};
+ size_t s;
+ uint64_t *pages;
+ dma_addr_t len = TARGET_PAGE_SIZE;
+ void *remap_addr, *curr_page;
+ AddressSpace *dma_as = VIRTIO_DEVICE(rdev)->dma_as;
+
+ s = iov_to_buf(in, 1, 0, &cmd, sizeof(cmd));
+ if (s != sizeof(cmd)) {
+ return VIRTIO_RDMA_CTRL_ERR;
+ }
+
+ pages = dma_memory_map(dma_as, cmd.pages, &len, DMA_DIRECTION_TO_DEVICE);
+
+ curr_page = dma_memory_map(dma_as, pages[0], &len, DMA_DIRECTION_TO_DEVICE);
+ remap_addr = mremap(curr_page, 0, TARGET_PAGE_SIZE * cmd.npages, MREMAP_MAYMOVE);
+ dma_memory_unmap(dma_as, curr_page, TARGET_PAGE_SIZE, DMA_DIRECTION_TO_DEVICE, 0);
+ if (remap_addr == MAP_FAILED) {
+ rdma_error_report("mremap failed\n");
+ return VIRTIO_RDMA_CTRL_ERR;
+ }
+
+ remap_pages(dma_as, pages + 1, remap_addr + TARGET_PAGE_SIZE, cmd.npages - 1);
+ dma_memory_unmap(dma_as, pages, len, DMA_DIRECTION_TO_DEVICE, 0);
+
+ rdma_rm_alloc_mr(rdev->rdma_dev_res, cmd.pdn, cmd.start, TARGET_PAGE_SIZE * cmd.npages,
+ remap_addr, cmd.access_flags, &rsp.mrn, &rsp.lkey, &rsp.rkey);
+ rsp.rkey = rdma_backend_mr_rkey(&rdma_rm_get_mr(rdev->rdma_dev_res, rsp.mrn)->backend_mr);
+ rdma_info_report("%s: 0x%x\n", __func__, rsp.mrn);
+
+ s = iov_from_buf(out, 1, 0, &rsp, sizeof(rsp));
+
+ return s == sizeof(rsp) ? VIRTIO_RDMA_CTRL_OK :
+ VIRTIO_RDMA_CTRL_ERR;
+}
+
+int virtio_rdma_dereg_mr(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out)
+{
+ struct cmd_dereg_mr cmd = {};
+ struct RdmaRmMR *mr;
+ struct virtio_rdma_kernel_mr *kmr;
+ size_t s;
+ uint32_t lkey;
+
+ s = iov_to_buf(in, 1, 0, &cmd, sizeof(cmd));
+ if (s != sizeof(cmd)) {
+ return VIRTIO_RDMA_CTRL_ERR;
+ }
+
+ mr = rdma_rm_get_mr(rdev->rdma_dev_res, cmd.mrn);
+ if (!mr)
+ return VIRTIO_RDMA_CTRL_ERR;
+
+ if (!cmd.is_user_mr) {
+ lkey = mr->lkey;
+ kmr = g_hash_table_lookup(rdev->lkey_mr_tbl, &lkey);
+ if (!kmr)
+ return VIRTIO_RDMA_CTRL_ERR;
+ rdma_backend_destroy_mr(&kmr->dummy_mr->backend_mr);
+ mr = kmr->real_mr;
+ g_hash_table_remove(rdev->lkey_mr_tbl, &lkey);
+ if (!mr)
+ return VIRTIO_RDMA_CTRL_OK;
+ }
+
+ munmap(mr->virt, mr->length);
+ rdma_backend_destroy_mr(&mr->backend_mr);
+ g_free(kmr);
+ return VIRTIO_RDMA_CTRL_OK;
+}
+
+int virtio_rdma_create_qp(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out)
+{
+ struct cmd_create_qp cmd = {};
+ struct rsp_create_qp rsp = {};
+ size_t s;
+ int rc;
+ //uint32_t recv_cqn;
+
+ s = iov_to_buf(in, 1, 0, &cmd, sizeof(cmd));
+ if (s != sizeof(cmd)) {
+ return VIRTIO_RDMA_CTRL_ERR;
+ }
+
+ // TODO: check max qp
+
+ printf("%s: %d qp type %d\n", __func__, cmd.pdn, cmd.qp_type);
+
+ // store recv_cqn in opaque
+ rc = rdma_rm_alloc_qp(rdev->rdma_dev_res, cmd.pdn, cmd.qp_type, cmd.max_send_wr,
+ cmd.max_send_sge, cmd.send_cqn, cmd.max_recv_wr,
+ cmd.max_recv_sge, cmd.recv_cqn, NULL, &rsp.qpn,
+ cmd.is_srq, cmd.srq_handle);
+
+ if (rc)
+ return VIRTIO_RDMA_CTRL_ERR;
+
+ printf("%s: %d\n", __func__, rsp.qpn);
+
+ s = iov_from_buf(out, 1, 0, &rsp, sizeof(rsp));
+
+ return s == sizeof(rsp) ? VIRTIO_RDMA_CTRL_OK :
+ VIRTIO_RDMA_CTRL_ERR;
+}
+
+static void virtio_rdma_ah_attr_to_ibv (struct virtio_rdma_ah_attr *ah_attr, struct ibv_ah_attr *ibv_attr) {
+ ibv_attr->grh.dgid = ah_attr->grh.dgid;
+ ibv_attr->grh.flow_label = ah_attr->grh.flow_label;
+ ibv_attr->grh.sgid_index = ah_attr->grh.sgid_index;
+ ibv_attr->grh.hop_limit = ah_attr->grh.hop_limit;
+ ibv_attr->grh.traffic_class = ah_attr->grh.traffic_class;
+
+ ibv_attr->dlid = ah_attr->dlid;
+ ibv_attr->sl = ah_attr->sl;
+ ibv_attr->src_path_bits = ah_attr->src_path_bits;
+ ibv_attr->static_rate = ah_attr->static_rate;
+ ibv_attr->port_num = ah_attr->port_num;
+}
+
+int virtio_rdma_modify_qp(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out)
+{
+ struct cmd_modify_qp cmd = {};
+ size_t s;
+ int rc;
+
+ RdmaRmQP *rqp;
+ struct ibv_qp_attr attr = {};
+
+ s = iov_to_buf(in, 1, 0, &cmd, sizeof(cmd));
+ if (s != sizeof(cmd)) {
+ return VIRTIO_RDMA_CTRL_ERR;
+ }
+
+ printf("%s: %d %d\n", __func__, cmd.qpn, cmd.attr.qp_state);
+
+ rqp = rdma_rm_get_qp(rdev->rdma_dev_res, cmd.qpn);
+ if (!rqp)
+ printf("Get qp failed\n");
+
+ if (rqp->qp_type == IBV_QPT_GSI) {
+ return VIRTIO_RDMA_CTRL_OK;
+ }
+
+ // TODO: assign attr based on cmd.attr_mask
+ attr.qp_state = cmd.attr.qp_state;
+ attr.cur_qp_state = cmd.attr.cur_qp_state;
+ attr.path_mtu = cmd.attr.path_mtu;
+ attr.path_mig_state = cmd.attr.path_mig_state;
+ attr.qkey = cmd.attr.qkey;
+ attr.rq_psn = cmd.attr.rq_psn;
+ attr.sq_psn = cmd.attr.sq_psn;
+ attr.dest_qp_num = cmd.attr.dest_qp_num;
+ attr.qp_access_flags = cmd.attr.qp_access_flags;
+ attr.pkey_index = cmd.attr.pkey_index;
+ attr.en_sqd_async_notify = cmd.attr.en_sqd_async_notify;
+ attr.sq_draining = cmd.attr.sq_draining;
+ attr.max_rd_atomic = cmd.attr.max_rd_atomic;
+ attr.max_dest_rd_atomic = cmd.attr.max_dest_rd_atomic;
+ attr.min_rnr_timer = cmd.attr.min_rnr_timer;
+ attr.port_num = cmd.attr.port_num;
+ attr.timeout = cmd.attr.timeout;
+ attr.retry_cnt = cmd.attr.retry_cnt;
+ attr.rnr_retry = cmd.attr.rnr_retry;
+ attr.alt_port_num = cmd.attr.alt_port_num;
+ attr.alt_timeout = cmd.attr.alt_timeout;
+ attr.rate_limit = cmd.attr.rate_limit;
+ attr.cap.max_inline_data = cmd.attr.cap.max_inline_data;
+ attr.cap.max_recv_sge = cmd.attr.cap.max_recv_sge;
+ attr.cap.max_recv_wr = cmd.attr.cap.max_recv_wr;
+ attr.cap.max_send_sge = cmd.attr.cap.max_send_sge;
+ attr.cap.max_send_wr = cmd.attr.cap.max_send_wr;
+ virtio_rdma_ah_attr_to_ibv(&cmd.attr.ah_attr, &attr.ah_attr);
+ virtio_rdma_ah_attr_to_ibv(&cmd.attr.alt_ah_attr, &attr.alt_ah_attr);
+
+ rqp->qp_state = cmd.attr.qp_state;
+
+ if (rqp->qp_state == IBV_QPS_RTR) {
+ rqp->backend_qp.sgid_idx = cmd.attr.ah_attr.grh.sgid_index;
+ attr.ah_attr.grh.sgid_index = cmd.attr.ah_attr.grh.sgid_index;
+ attr.ah_attr.is_global = 1;
+ }
+
+ printf("modify_qp_debug %d %d %d %d %d %d %d %d\n", cmd.qpn, cmd.attr_mask, cmd.attr.ah_attr.grh.sgid_index,
+ cmd.attr.dest_qp_num, cmd.attr.qp_state, cmd.attr.qkey, cmd.attr.rq_psn, cmd.attr.sq_psn);
+
+ rc = ibv_modify_qp(rqp->backend_qp.ibqp, &attr, cmd.attr_mask);
+ /*
+ rc = rdma_rm_modify_qp(rdev->rdma_dev_res, rdev->backend_dev,
+ cmd.qpn, cmd.attr_mask,
+ cmd.attr.ah_attr.grh.sgid_index,
+ &cmd.attr.ah_attr.grh.dgid,
+ cmd.attr.dest_qp_num,
+ (enum ibv_qp_state)cmd.attr.qp_state,
+ cmd.attr.qkey, cmd.attr.rq_psn,
+ cmd.attr.sq_psn);*/
+
+ if (rc) {
+ rdma_error_report( "ibv_modify_qp fail, rc=%d, errno=%d", rc, errno);
+ return -EIO;
+ }
+ return rc;
+}
+
+int virtio_rdma_query_qp(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out)
+{
+ struct cmd_query_qp cmd = {};
+ struct rsp_query_qp rsp = {};
+ struct ibv_qp_init_attr init_attr;
+ size_t s;
+ int rc;
+
+ s = iov_to_buf(in, 1, 0, &cmd, sizeof(cmd));
+ if (s != sizeof(cmd)) {
+ return VIRTIO_RDMA_CTRL_ERR;
+ }
+
+ memset(&rsp, 0, sizeof(rsp));
+
+ rc = rdma_rm_query_qp(rdev->rdma_dev_res, rdev->backend_dev, cmd.qpn,
+ (struct ibv_qp_attr *)&rsp.attr, cmd.attr_mask,
+ &init_attr);
+ if (rc)
+ return -EIO;
+
+ s = iov_from_buf(out, 1, 0, &rsp, sizeof(rsp));
+
+ return s == sizeof(rsp) ? VIRTIO_RDMA_CTRL_OK :
+ VIRTIO_RDMA_CTRL_ERR;
+}
+
+int virtio_rdma_destroy_qp(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out)
+{
+ struct cmd_destroy_qp cmd = {};
+ size_t s;
+
+ s = iov_to_buf(in, 1, 0, &cmd, sizeof(cmd));
+ if (s != sizeof(cmd)) {
+ return VIRTIO_RDMA_CTRL_ERR;
+ }
+
+ rdma_info_report("%s: %d", __func__, cmd.qpn);
+
+ rdma_rm_dealloc_qp(rdev->rdma_dev_res, cmd.qpn);
+
+ return VIRTIO_RDMA_CTRL_OK;
+}
+
+int virtio_rdma_query_gid(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out)
+{
+ struct cmd_query_gid cmd = {};
+ union ibv_gid gid = {};
+ size_t s;
+ int rc;
+
+ s = iov_to_buf(in, 1, 0, &cmd, sizeof(cmd));
+ if (s != sizeof(cmd)) {
+ return VIRTIO_RDMA_CTRL_ERR;
+ }
+
+ rc = ibv_query_gid(rdev->backend_dev->context, cmd.port, cmd.index,
+ &gid);
+ if (rc)
+ return VIRTIO_RDMA_CTRL_ERR;
+
+ s = iov_from_buf(out, 1, 0, &gid, sizeof(gid));
+
+ return s == sizeof(gid) ? VIRTIO_RDMA_CTRL_OK :
+ VIRTIO_RDMA_CTRL_ERR;
+}
+
+int virtio_rdma_create_uc(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out)
+{
+ struct cmd_create_uc cmd = {};
+ struct rsp_create_uc rsp = {};
+ size_t s;
+ int rc;
+
+ s = iov_to_buf(in, 1, 0, &cmd, sizeof(cmd));
+ if (s != sizeof(cmd)) {
+ return VIRTIO_RDMA_CTRL_ERR;
+ }
+
+ rc = rdma_rm_alloc_uc(rdev->rdma_dev_res, cmd.pfn, &rsp.ctx_handle);
+
+ if (rc)
+ return VIRTIO_RDMA_CTRL_ERR;
+
+ s = iov_from_buf(out, 1, 0, &rsp, sizeof(rsp));
+
+ return s == sizeof(rsp) ? VIRTIO_RDMA_CTRL_OK :
+ VIRTIO_RDMA_CTRL_ERR;
+}
+
+int virtio_rdma_dealloc_uc(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out)
+{
+ struct cmd_dealloc_uc cmd = {};
+ size_t s;
+
+ s = iov_to_buf(in, 1, 0, &cmd, sizeof(cmd));
+ if (s != sizeof(cmd)) {
+ return VIRTIO_RDMA_CTRL_ERR;
+ }
+
+ rdma_rm_dealloc_uc(rdev->rdma_dev_res, cmd.ctx_handle);
+
+ return VIRTIO_RDMA_CTRL_OK;
+}
+
+int virtio_rdma_query_pkey(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out)
+{
+ struct cmd_query_pkey cmd = {};
+ struct rsp_query_pkey rsp = {};
+ size_t s;
+
+ s = iov_to_buf(in, 1, 0, &cmd, sizeof(cmd));
+ if (s != sizeof(cmd)) {
+ return VIRTIO_RDMA_CTRL_ERR;
+ }
+
+ rsp.pkey = 0xFFFF;
+
+ s = iov_from_buf(out, 1, 0, &rsp, sizeof(rsp));
+
+ return s == sizeof(rsp) ? VIRTIO_RDMA_CTRL_OK :
+ VIRTIO_RDMA_CTRL_ERR;
+}
+
+static void virtio_rdma_init_dev_caps(VirtIORdma *rdev)
+{
+ rdev->dev_attr.max_qp_wr = 1024;
+}
+
+int virtio_rdma_init_ib(VirtIORdma *rdev)
+{
+ int rc;
+
+ virtio_rdma_init_dev_caps(rdev);
+
+ rdev->rdma_dev_res = g_malloc0(sizeof(RdmaDeviceResources));
+ rdev->backend_dev = g_malloc0(sizeof(RdmaBackendDev));
+
+ rc = rdma_backend_init(rdev->backend_dev, NULL, rdev->rdma_dev_res,
+ rdev->backend_device_name,
+ rdev->backend_port_num, &rdev->dev_attr,
+ &rdev->mad_chr);
+ if (rc) {
+ rdma_error_report("Fail to initialize backend device");
+ return rc;
+ }
+
+ rdev->dev_attr.max_mr_size = 4096;
+ rdev->dev_attr.page_size_cap = 4096;
+ rdev->dev_attr.vendor_id = 1;
+ rdev->dev_attr.vendor_part_id = 1;
+ rdev->dev_attr.hw_ver = VIRTIO_RDMA_HW_VER;
+ rdev->dev_attr.atomic_cap = IBV_ATOMIC_NONE;
+ rdev->dev_attr.max_pkeys = 1;
+ rdev->dev_attr.phys_port_cnt = VIRTIO_RDMA_PORT_CNT;
+
+ rc = rdma_rm_init(rdev->rdma_dev_res, &rdev->dev_attr);
+ if (rc) {
+ rdma_error_report("Fail to initialize resource manager");
+ return rc;
+ }
+
+ virtio_rdma_qp_ops_init();
+
+ rdma_backend_start(rdev->backend_dev);
+
+ return 0;
+}
+
+void virtio_rdma_fini_ib(VirtIORdma *rdev)
+{
+ rdma_backend_stop(rdev->backend_dev);
+ virtio_rdma_qp_ops_fini();
+ rdma_rm_fini(rdev->rdma_dev_res, rdev->backend_dev,
+ rdev->backend_eth_device_name);
+ rdma_backend_fini(rdev->backend_dev);
+ g_free(rdev->rdma_dev_res);
+ g_free(rdev->backend_dev);
+}
new file mode 100644
@@ -0,0 +1,176 @@
+/*
+ * Virtio RDMA Device - IB verbs
+ *
+ * Copyright (C) 2019 Oracle
+ *
+ * Authors:
+ * Yuval Shaia <yuval.shaia@oracle.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef VIRTIO_RDMA_IB_H
+#define VIRTIO_RDMA_IB_H
+
+#include "qemu/osdep.h"
+#include "qemu/iov.h"
+#include "hw/virtio/virtio-rdma.h"
+
+#include "../rdma_rm.h"
+
+enum virtio_rdma_wr_opcode {
+ VIRTIO_RDMA_WR_RDMA_WRITE,
+ VIRTIO_RDMA_WR_RDMA_WRITE_WITH_IMM,
+ VIRTIO_RDMA_WR_SEND,
+ VIRTIO_RDMA_WR_SEND_WITH_IMM,
+ VIRTIO_RDMA_WR_RDMA_READ,
+ VIRTIO_RDMA_WR_ATOMIC_CMP_AND_SWP,
+ VIRTIO_RDMA_WR_ATOMIC_FETCH_AND_ADD,
+ VIRTIO_RDMA_WR_LOCAL_INV,
+ VIRTIO_RDMA_WR_BIND_MW,
+ VIRTIO_RDMA_WR_SEND_WITH_INV,
+ VIRTIO_RDMA_WR_TSO,
+ VIRTIO_RDMA_WR_DRIVER1,
+
+ VIRTIO_RDMA_WR_REG_MR = 0x20,
+};
+
+struct virtio_rdma_cqe {
+ uint64_t wr_id;
+ enum ibv_wc_status status;
+ enum ibv_wc_opcode opcode;
+ uint32_t vendor_err;
+ uint32_t byte_len;
+ uint32_t imm_data;
+ uint32_t qp_num;
+ uint32_t src_qp;
+ int wc_flags;
+ uint16_t pkey_index;
+ uint16_t slid;
+ uint8_t sl;
+ uint8_t dlid_path_bits;
+};
+
+struct CompHandlerCtx {
+ VirtIORdma *dev;
+ uint32_t cq_handle;
+ struct virtio_rdma_cqe cqe;
+};
+
+struct virtio_rdma_kernel_mr {
+ RdmaRmMR *dummy_mr; // created by create_mr
+ RdmaRmMR *real_mr; // real mr created by map_mr_sg
+
+ void* virt;
+ uint64_t length;
+ uint64_t start;
+ uint32_t mrn;
+ uint32_t lkey;
+ uint32_t rkey;
+
+ uint32_t max_num_sg;
+ uint8_t dma_mr;
+};
+
+struct virtio_rdma_global_route {
+ union ibv_gid dgid;
+ uint32_t flow_label;
+ uint8_t sgid_index;
+ uint8_t hop_limit;
+ uint8_t traffic_class;
+};
+
+struct virtio_rdma_ah_attr {
+ struct virtio_rdma_global_route grh;
+ uint16_t dlid;
+ uint8_t sl;
+ uint8_t src_path_bits;
+ uint8_t static_rate;
+ uint8_t port_num;
+};
+
+struct virtio_rdma_qp_cap {
+ uint32_t max_send_wr;
+ uint32_t max_recv_wr;
+ uint32_t max_send_sge;
+ uint32_t max_recv_sge;
+ uint32_t max_inline_data;
+};
+
+struct virtio_rdma_qp_attr {
+ enum ibv_qp_state qp_state;
+ enum ibv_qp_state cur_qp_state;
+ enum ibv_mtu path_mtu;
+ enum ibv_mig_state path_mig_state;
+ uint32_t qkey;
+ uint32_t rq_psn;
+ uint32_t sq_psn;
+ uint32_t dest_qp_num;
+ uint32_t qp_access_flags;
+ uint16_t pkey_index;
+ uint16_t alt_pkey_index;
+ uint8_t en_sqd_async_notify;
+ uint8_t sq_draining;
+ uint8_t max_rd_atomic;
+ uint8_t max_dest_rd_atomic;
+ uint8_t min_rnr_timer;
+ uint8_t port_num;
+ uint8_t timeout;
+ uint8_t retry_cnt;
+ uint8_t rnr_retry;
+ uint8_t alt_port_num;
+ uint8_t alt_timeout;
+ uint32_t rate_limit;
+ struct virtio_rdma_qp_cap cap;
+ struct virtio_rdma_ah_attr ah_attr;
+ struct virtio_rdma_ah_attr alt_ah_attr;
+};
+
+#define VIRTIO_RDMA_PORT_CNT 1
+#define VIRTIO_RDMA_HW_VER 1
+
+int virtio_rdma_init_ib(VirtIORdma *rdev);
+void virtio_rdma_fini_ib(VirtIORdma *rdev);
+
+int virtio_rdma_query_device(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out);
+int virtio_rdma_query_port(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out);
+int virtio_rdma_create_cq(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out);
+int virtio_rdma_destroy_cq(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out);
+int virtio_rdma_create_pd(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out);
+int virtio_rdma_destroy_pd(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out);
+int virtio_rdma_get_dma_mr(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out);
+int virtio_rdma_create_mr(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out);
+int virtio_rdma_reg_user_mr(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out);
+int virtio_rdma_create_qp(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out);
+int virtio_rdma_modify_qp(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out);
+int virtio_rdma_query_qp(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out);
+int virtio_rdma_query_gid(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out);
+int virtio_rdma_destroy_qp(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out);
+int virtio_rdma_map_mr_sg(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out);
+int virtio_rdma_dereg_mr(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out);
+int virtio_rdma_create_uc(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out);
+int virtio_rdma_query_pkey(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out);
+int virtio_rdma_dealloc_uc(VirtIORdma *rdev, struct iovec *in,
+ struct iovec *out);
+
+#endif
new file mode 100644
@@ -0,0 +1,231 @@
+/*
+ * Virtio RDMA Device
+ *
+ * Copyright (C) 2019 Oracle
+ *
+ * Authors:
+ * Yuval Shaia <yuval.shaia@oracle.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include <infiniband/verbs.h>
+#include <unistd.h>
+
+#include "qemu/osdep.h"
+#include "hw/virtio/virtio.h"
+#include "qemu/error-report.h"
+#include "hw/virtio/virtio-bus.h"
+#include "hw/virtio/virtio-rdma.h"
+#include "hw/qdev-properties.h"
+#include "include/standard-headers/linux/virtio_ids.h"
+
+#include "virtio-rdma-ib.h"
+#include "virtio-rdma-qp.h"
+#include "virtio-rdma-dev-api.h"
+
+#include "../rdma_rm_defs.h"
+#include "../rdma_utils.h"
+
+#define DEFINE_VIRTIO_RDMA_CMD(cmd, handler) [cmd] = {handler, #cmd},
+
+struct {
+ int (*handler)(VirtIORdma *rdev, struct iovec *in, struct iovec *out);
+ const char* name;
+} cmd_tbl[] = {
+ DEFINE_VIRTIO_RDMA_CMD(VIRTIO_CMD_QUERY_DEVICE, virtio_rdma_query_device)
+ DEFINE_VIRTIO_RDMA_CMD(VIRTIO_CMD_QUERY_PORT, virtio_rdma_query_port)
+ DEFINE_VIRTIO_RDMA_CMD(VIRTIO_CMD_CREATE_CQ, virtio_rdma_create_cq)
+ DEFINE_VIRTIO_RDMA_CMD(VIRTIO_CMD_DESTROY_CQ, virtio_rdma_destroy_cq)
+ DEFINE_VIRTIO_RDMA_CMD(VIRTIO_CMD_CREATE_PD, virtio_rdma_create_pd)
+ DEFINE_VIRTIO_RDMA_CMD(VIRTIO_CMD_DESTROY_PD, virtio_rdma_destroy_pd)
+ DEFINE_VIRTIO_RDMA_CMD(VIRTIO_CMD_GET_DMA_MR, virtio_rdma_get_dma_mr)
+ DEFINE_VIRTIO_RDMA_CMD(VIRTIO_CMD_CREATE_MR, virtio_rdma_create_mr)
+ DEFINE_VIRTIO_RDMA_CMD(VIRTIO_CMD_MAP_MR_SG, virtio_rdma_map_mr_sg)
+ DEFINE_VIRTIO_RDMA_CMD(VIRTIO_CMD_REG_USER_MR, virtio_rdma_reg_user_mr)
+ DEFINE_VIRTIO_RDMA_CMD(VIRTIO_CMD_DEREG_MR, virtio_rdma_dereg_mr)
+ DEFINE_VIRTIO_RDMA_CMD(VIRTIO_CMD_CREATE_QP, virtio_rdma_create_qp)
+ DEFINE_VIRTIO_RDMA_CMD(VIRTIO_CMD_MODIFY_QP, virtio_rdma_modify_qp)
+ DEFINE_VIRTIO_RDMA_CMD(VIRTIO_CMD_QUERY_QP, virtio_rdma_query_qp)
+ DEFINE_VIRTIO_RDMA_CMD(VIRTIO_CMD_DESTROY_QP, virtio_rdma_destroy_qp)
+ DEFINE_VIRTIO_RDMA_CMD(VIRTIO_CMD_QUERY_GID, virtio_rdma_query_gid)
+ DEFINE_VIRTIO_RDMA_CMD(VIRTIO_CMD_CREATE_UC, virtio_rdma_create_uc)
+ DEFINE_VIRTIO_RDMA_CMD(VIRTIO_CMD_DEALLOC_UC, virtio_rdma_dealloc_uc)
+ DEFINE_VIRTIO_RDMA_CMD(VIRTIO_CMD_QUERY_PKEY, virtio_rdma_query_pkey)
+};
+
+static void virtio_rdma_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
+{
+ VirtIORdma *r = VIRTIO_RDMA(vdev);
+ struct control_buf cb;
+ VirtQueueElement *e;
+ size_t s;
+
+ virtio_queue_set_notification(vq, 0);
+
+ for (;;) {
+ e = virtqueue_pop(vq, sizeof(VirtQueueElement));
+ if (!e) {
+ break;
+ }
+
+ if (iov_size(e->in_sg, e->in_num) < sizeof(cb.status) ||
+ iov_size(e->out_sg, e->out_num) < sizeof(cb.cmd)) {
+ virtio_error(vdev, "Got invalid message size");
+ virtqueue_detach_element(vq, e, 0);
+ g_free(e);
+ break;
+ }
+
+ s = iov_to_buf(&e->out_sg[0], 1, 0, &cb.cmd, sizeof(cb.cmd));
+ if (s != sizeof(cb.cmd)) {
+ cb.status = VIRTIO_RDMA_CTRL_ERR;
+ } else {
+ printf("cmd=%d %s\n", cb.cmd, cmd_tbl[cb.cmd].name);
+ if (cb.cmd >= VIRTIO_MAX_CMD_NUM) {
+ rdma_warn_report("unknown cmd %d\n", cb.cmd);
+ cb.status = VIRTIO_RDMA_CTRL_ERR;
+ } else {
+ if (cmd_tbl[cb.cmd].handler) {
+ cb.status = cmd_tbl[cb.cmd].handler(r, &e->out_sg[1],
+ &e->in_sg[0]);
+ } else {
+ rdma_warn_report("no handler for cmd %d\n", cb.cmd);
+ cb.status = VIRTIO_RDMA_CTRL_ERR;
+ }
+ }
+ }
+ printf("status=%d\n", cb.status);
+ s = iov_from_buf(&e->in_sg[1], 1, 0, &cb.status, sizeof(cb.status));
+ assert(s == sizeof(cb.status));
+
+ virtqueue_push(vq, e, sizeof(cb.status));
+ g_free(e);
+ virtio_notify(vdev, vq);
+ }
+
+ virtio_queue_set_notification(vq, 1);
+}
+
+static void g_free_destroy(gpointer data) {
+ g_free(data);
+}
+
+static void virtio_rdma_device_realize(DeviceState *dev, Error **errp)
+{
+ VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+ VirtIORdma *r = VIRTIO_RDMA(dev);
+ int rc, i;
+
+ rc = virtio_rdma_init_ib(r);
+ if (rc) {
+ rdma_error_report("Fail to initialize IB layer");
+ return;
+ }
+
+ virtio_init(vdev, "virtio-rdma", VIRTIO_ID_RDMA, 1024);
+
+ r->lkey_mr_tbl = g_hash_table_new_full(g_int_hash, g_int_equal, g_free_destroy, NULL);
+
+ r->ctrl_vq = virtio_add_queue(vdev, 64, virtio_rdma_handle_ctrl);
+
+ r->cq_vqs = g_malloc0_n(64, sizeof(*r->cq_vqs));
+ for (i = 0; i < 64; i++) {
+ r->cq_vqs[i] = virtio_add_queue(vdev, 64, NULL);
+ }
+
+ r->qp_vqs = g_malloc0_n(64 * 2, sizeof(*r->cq_vqs));
+ for (i = 0; i < 64 * 2; i += 2) {
+ r->qp_vqs[i] = virtio_add_queue(vdev, 64, virtio_rdma_handle_sq);
+ r->qp_vqs[i+1] = virtio_add_queue(vdev, 64, virtio_rdma_handle_rq);
+ }
+}
+
+static void virtio_rdma_device_unrealize(DeviceState *dev)
+{
+ VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+ VirtIORdma *r = VIRTIO_RDMA(dev);
+
+ virtio_del_queue(vdev, 0);
+
+ virtio_cleanup(vdev);
+
+ virtio_rdma_fini_ib(r);
+}
+
+static uint64_t virtio_rdma_get_features(VirtIODevice *vdev, uint64_t features,
+ Error **errp)
+{
+ /* virtio_add_feature(&features, VIRTIO_NET_F_MAC); */
+
+ vdev->backend_features = features;
+
+ return features;
+}
+
+
+static Property virtio_rdma_dev_properties[] = {
+ DEFINE_PROP_STRING("netdev", VirtIORdma, backend_eth_device_name),
+ DEFINE_PROP_STRING("ibdev",VirtIORdma, backend_device_name),
+ DEFINE_PROP_UINT8("ibport", VirtIORdma, backend_port_num, 1),
+ DEFINE_PROP_UINT64("dev-caps-max-mr-size", VirtIORdma, dev_attr.max_mr_size,
+ MAX_MR_SIZE),
+ DEFINE_PROP_INT32("dev-caps-max-qp", VirtIORdma, dev_attr.max_qp, MAX_QP),
+ DEFINE_PROP_INT32("dev-caps-max-cq", VirtIORdma, dev_attr.max_cq, MAX_CQ),
+ DEFINE_PROP_INT32("dev-caps-max-mr", VirtIORdma, dev_attr.max_mr, MAX_MR),
+ DEFINE_PROP_INT32("dev-caps-max-pd", VirtIORdma, dev_attr.max_pd, MAX_PD),
+ DEFINE_PROP_INT32("dev-caps-qp-rd-atom", VirtIORdma,
+ dev_attr.max_qp_rd_atom, MAX_QP_RD_ATOM),
+ DEFINE_PROP_INT32("dev-caps-max-qp-init-rd-atom", VirtIORdma,
+ dev_attr.max_qp_init_rd_atom, MAX_QP_INIT_RD_ATOM),
+ DEFINE_PROP_INT32("dev-caps-max-ah", VirtIORdma, dev_attr.max_ah, MAX_AH),
+ DEFINE_PROP_INT32("dev-caps-max-srq", VirtIORdma, dev_attr.max_srq, MAX_SRQ),
+ DEFINE_PROP_CHR("mad-chardev", VirtIORdma, mad_chr),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+struct virtio_rdma_config {
+ int32_t max_cq;
+};
+
+static void virtio_rdma_get_config(VirtIODevice *vdev, uint8_t *config)
+{
+ VirtIORdma *r = VIRTIO_RDMA(vdev);
+ struct virtio_rdma_config cfg;
+
+ cfg.max_cq = r->dev_attr.max_cq;
+
+ memcpy(config, &cfg, sizeof(cfg));
+}
+
+static void virtio_rdma_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+
+ set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
+ vdc->realize = virtio_rdma_device_realize;
+ vdc->unrealize = virtio_rdma_device_unrealize;
+ vdc->get_features = virtio_rdma_get_features;
+ vdc->get_config = virtio_rdma_get_config;
+
+ dc->desc = "Virtio RDMA Device";
+ device_class_set_props(dc, virtio_rdma_dev_properties);
+ set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
+}
+
+static const TypeInfo virtio_rdma_info = {
+ .name = TYPE_VIRTIO_RDMA,
+ .parent = TYPE_VIRTIO_DEVICE,
+ .instance_size = sizeof(VirtIORdma),
+ .class_init = virtio_rdma_class_init,
+};
+
+static void virtio_register_types(void)
+{
+ type_register_static(&virtio_rdma_info);
+}
+
+type_init(virtio_register_types)
new file mode 100644
@@ -0,0 +1,241 @@
+/*
+ * Virtio RDMA Device - QP ops
+ *
+ * Copyright (C) 2021 Bytedance Inc.
+ *
+ * Authors:
+ * Junji Wei <weijunji@bytedance.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include <infiniband/verbs.h>
+#include <malloc.h>
+
+#include "qemu/osdep.h"
+#include "qemu/atomic.h"
+#include "cpu.h"
+
+#include "virtio-rdma-ib.h"
+#include "virtio-rdma-qp.h"
+#include "virtio-rdma-dev-api.h"
+
+#include "../rdma_utils.h"
+#include "../rdma_rm.h"
+#include "../rdma_backend.h"
+
+void virtio_rdma_qp_ops_comp_handler(void *ctx, struct ibv_wc *wc)
+{
+ VirtQueueElement *e;
+ VirtQueue *vq;
+ struct CompHandlerCtx *comp_ctx = (struct CompHandlerCtx *)ctx;
+ size_t s;
+ struct virtio_rdma_cqe* cqe;
+
+ vq = comp_ctx->dev->cq_vqs[comp_ctx->cq_handle];
+ e = virtqueue_pop(vq, sizeof(VirtQueueElement));
+ if (!e) {
+ rdma_error_report("pop cq vq failed");
+ }
+
+ cqe = &comp_ctx->cqe;
+ cqe->status = wc->status;
+ cqe->opcode = wc->opcode;
+ cqe->vendor_err = wc->vendor_err;
+ cqe->byte_len = wc->byte_len;
+ cqe->imm_data = wc->imm_data;
+ cqe->src_qp = wc->src_qp;
+ cqe->wc_flags = wc->wc_flags;
+ cqe->pkey_index = wc->pkey_index;
+ cqe->slid = wc->slid;
+ cqe->sl = wc->sl;
+ cqe->dlid_path_bits = wc->dlid_path_bits;
+
+ s = iov_from_buf(&e->in_sg[0], 1, 0, &comp_ctx->cqe, sizeof(comp_ctx->cqe));
+ assert(s == sizeof(comp_ctx->cqe));
+ virtqueue_push(vq, e, sizeof(comp_ctx->cqe));
+
+ virtio_notify(&comp_ctx->dev->parent_obj, vq);
+
+ g_free(e);
+ g_free(comp_ctx);
+}
+
+void virtio_rdma_qp_ops_fini(void)
+{
+ rdma_backend_unregister_comp_handler();
+}
+
+int virtio_rdma_qp_ops_init(void)
+{
+ rdma_backend_register_comp_handler(virtio_rdma_qp_ops_comp_handler);
+
+ return 0;
+}
+
+void virtio_rdma_handle_sq(VirtIODevice *vdev, VirtQueue *vq)
+{
+ VirtIORdma *dev = VIRTIO_RDMA(vdev);
+ VirtQueueElement *e;
+ struct cmd_post_send cmd;
+ struct ibv_sge *sge;
+ RdmaRmQP *qp;
+ struct virtio_rdma_kernel_mr *kmr;
+ size_t s;
+ int status = 0, i;
+ struct CompHandlerCtx *comp_ctx;
+
+ RdmaRmMR *mr;
+ uint32_t lkey;
+ uint32_t *htbl_key;
+
+ for (;;) {
+ e = virtqueue_pop(vq, sizeof(VirtQueueElement));
+ if (!e) {
+ break;
+ }
+
+ s = iov_to_buf(&e->out_sg[0], 1, 0, &cmd, sizeof(cmd));
+ if (s != sizeof(cmd)) {
+ rdma_error_report("bad cmd");
+ break;
+ }
+
+ qp = rdma_rm_get_qp(dev->rdma_dev_res, cmd.qpn);
+
+ sge = g_malloc0_n(cmd.num_sge, sizeof(*sge));
+ s = iov_to_buf(&e->out_sg[1], 1, 0, sge, cmd.num_sge * sizeof(*sge));
+ if (s != cmd.num_sge * sizeof(*sge)) {
+ rdma_error_report("bad sge");
+ break;
+ }
+
+ if (cmd.is_kernel) {
+ if (cmd.opcode == VIRTIO_RDMA_WR_REG_MR) {
+ mr = rdma_rm_get_mr(dev->rdma_dev_res, cmd.wr.reg.mrn);
+ lkey = mr->lkey;
+ kmr = g_hash_table_lookup(dev->lkey_mr_tbl, &lkey);
+ rdma_rm_alloc_mr(dev->rdma_dev_res, mr->pd_handle, (uint64_t)kmr->virt, kmr->length,
+ kmr->virt, cmd.wr.reg.access, &kmr->mrn, &kmr->lkey, &kmr->rkey);
+ kmr->real_mr = rdma_rm_get_mr(dev->rdma_dev_res, kmr->mrn);
+ if (cmd.wr.reg.key != mr->lkey) {
+ // rebuild lkey -> kmr
+ g_hash_table_remove(dev->lkey_mr_tbl, &lkey);
+
+ htbl_key = g_malloc0(sizeof(*htbl_key));
+ *htbl_key = cmd.wr.reg.key;
+
+ g_hash_table_insert(dev->lkey_mr_tbl, htbl_key, kmr);
+ }
+ goto fin;
+ }
+ /* In kernel mode, need to map guest addr to remaped addr */
+ for (i = 0; i < cmd.num_sge; i++) {
+ kmr = g_hash_table_lookup(dev->lkey_mr_tbl, &sge[i].lkey);
+ if (!kmr) {
+ rdma_error_report("Cannot found mr with lkey %u", sge[i].lkey);
+ // TODO: handler this error
+ }
+ sge[i].addr = (uint64_t) kmr->virt + (sge[i].addr - kmr->start);
+ sge[i].lkey = kmr->lkey;
+ }
+ }
+ // TODO: copy depend on opcode
+
+ /* Prepare CQE */
+ comp_ctx = g_malloc(sizeof(*comp_ctx));
+ comp_ctx->dev = dev;
+ comp_ctx->cq_handle = qp->send_cq_handle;
+ comp_ctx->cqe.wr_id = cmd.wr_id;
+ comp_ctx->cqe.qp_num = cmd.qpn;
+ comp_ctx->cqe.opcode = IBV_WC_SEND;
+
+ rdma_backend_post_send(dev->backend_dev, &qp->backend_qp, qp->qp_type, sge, 1, 0, NULL, NULL, 0, 0, comp_ctx);
+
+fin:
+ s = iov_from_buf(&e->in_sg[0], 1, 0, &status, sizeof(status));
+ if (s != sizeof(status))
+ break;
+
+ virtqueue_push(vq, e, sizeof(status));
+ g_free(e);
+ g_free(sge);
+ virtio_notify(vdev, vq);
+ }
+}
+
+void virtio_rdma_handle_rq(VirtIODevice *vdev, VirtQueue *vq)
+{
+ VirtIORdma *dev = VIRTIO_RDMA(vdev);
+ VirtQueueElement *e;
+ struct cmd_post_recv cmd;
+ struct ibv_sge *sge;
+ RdmaRmQP *qp;
+ struct virtio_rdma_kernel_mr *kmr;
+ size_t s;
+ int i, status = 0;
+ struct CompHandlerCtx *comp_ctx;
+
+ for (;;) {
+ e = virtqueue_pop(vq, sizeof(VirtQueueElement));
+ if (!e)
+ break;
+
+ s = iov_to_buf(&e->out_sg[0], 1, 0, &cmd, sizeof(cmd));
+ if (s != sizeof(cmd)) {
+ fprintf(stderr, "bad cmd\n");
+ break;
+ }
+
+ qp = rdma_rm_get_qp(dev->rdma_dev_res, cmd.qpn);
+
+ if (!qp->backend_qp.ibqp) {
+ if (qp->qp_type == IBV_QPT_SMI)
+ rdma_error_report("Not support SMI");
+ if (qp->qp_type == IBV_QPT_GSI)
+ rdma_warn_report("Not support GSI now");
+ goto end;
+ }
+
+ sge = g_malloc0_n(cmd.num_sge, sizeof(*sge));
+ s = iov_to_buf(&e->out_sg[1], 1, 0, sge, cmd.num_sge * sizeof(*sge));
+ if (s != cmd.num_sge * sizeof(*sge)) {
+ rdma_error_report("bad sge");
+ break;
+ }
+
+ if (cmd.is_kernel) {
+ /* In kernel mode, need to map guest addr to remaped addr */
+ for (i = 0; i < cmd.num_sge; i++) {
+ kmr = g_hash_table_lookup(dev->lkey_mr_tbl, &sge[i].lkey);
+ if (!kmr) {
+ rdma_error_report("Cannot found mr with lkey %u", sge[i].lkey);
+ // TODO: handler this error
+ }
+ sge[i].addr = (uint64_t) kmr->virt + (sge[i].addr - kmr->start);
+ sge[i].lkey = kmr->lkey;
+ }
+ }
+
+ comp_ctx = g_malloc(sizeof(*comp_ctx));
+ comp_ctx->dev = dev;
+ comp_ctx->cq_handle = qp->recv_cq_handle;
+ comp_ctx->cqe.wr_id = cmd.wr_id;
+ comp_ctx->cqe.qp_num = cmd.qpn;
+ comp_ctx->cqe.opcode = IBV_WC_RECV;
+
+ rdma_backend_post_recv(dev->backend_dev, &qp->backend_qp, qp->qp_type, sge, 1, comp_ctx);
+
+end:
+ s = iov_from_buf(&e->in_sg[0], 1, 0, &status, sizeof(status));
+ if (s != sizeof(status))
+ break;
+
+ virtqueue_push(vq, e, sizeof(status));
+ g_free(e);
+ g_free(sge);
+ virtio_notify(vdev, vq);
+ }
+}
new file mode 100644
@@ -0,0 +1,29 @@
+/*
+ * Virtio RDMA Device - QP ops
+ *
+ * Copyright (C) 2021 Bytedance Inc.
+ *
+ * Authors:
+ * Junji Wei <weijunji@bytedance.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef VIRTIO_RDMA_QP_H
+#define VIRTIO_RDMA_QP_H
+
+#include "qemu/osdep.h"
+#include "qemu/iov.h"
+#include "hw/virtio/virtio-rdma.h"
+
+#include "../rdma_rm.h"
+
+void virtio_rdma_qp_ops_comp_handler(void *ctx, struct ibv_wc *wc);
+void virtio_rdma_qp_ops_fini(void);
+int virtio_rdma_qp_ops_init(void);
+void virtio_rdma_handle_sq(VirtIODevice *vdev, VirtQueue *vq);
+void virtio_rdma_handle_rq(VirtIODevice *vdev, VirtQueue *vq);
+
+#endif
\ No newline at end of file
@@ -41,6 +41,7 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_9P', if_true: files('virtio-9p-pci.c'))
virtio_pci_ss.add(when: 'CONFIG_VIRTIO_SCSI', if_true: files('virtio-scsi-pci.c'))
virtio_pci_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk-pci.c'))
virtio_pci_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('virtio-net-pci.c'))
+virtio_pci_ss.add(when: 'CONFIG_VIRTIO_RDMA', if_true: files('virtio-rdma-pci.c'))
virtio_pci_ss.add(when: 'CONFIG_VIRTIO_SERIAL', if_true: files('virtio-serial-pci.c'))
virtio_pci_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem-pci.c'))
virtio_pci_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu-pci.c'))
new file mode 100644
@@ -0,0 +1,110 @@
+/*
+ * Virtio rdma PCI Bindings
+ *
+ * Copyright (C) 2019 Oracle
+ *
+ * Authors:
+ * Yuval Shaia <yuval.shaia@oracle.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/virtio/virtio-net-pci.h"
+#include "hw/virtio/virtio-rdma.h"
+#include "virtio-pci.h"
+#include "qapi/error.h"
+#include "hw/qdev-properties.h"
+
+typedef struct VirtIORdmaPCI VirtIORdmaPCI;
+
+/*
+ * virtio-rdma-pci: This extends VirtioPCIProxy.
+ */
+#define TYPE_VIRTIO_RDMA_PCI "virtio-rdma-pci-base"
+#define VIRTIO_RDMA_PCI(obj) \
+ OBJECT_CHECK(VirtIORdmaPCI, (obj), TYPE_VIRTIO_RDMA_PCI)
+
+struct VirtIORdmaPCI {
+ VirtIOPCIProxy parent_obj;
+ VirtIORdma vdev;
+};
+
+static Property virtio_rdma_properties[] = {
+ DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
+ VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
+ DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_rdma_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
+{
+ VirtIORdmaPCI *dev = VIRTIO_RDMA_PCI(vpci_dev);
+ DeviceState *vdev = DEVICE(&dev->vdev);
+ VirtIONetPCI *vnet_pci;
+ PCIDevice *func0;
+
+ qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus), errp);
+ object_property_set_bool(OBJECT(vdev), "realized", true, errp);
+
+ func0 = pci_get_function_0(&vpci_dev->pci_dev);
+ /* Break if not virtio device in slot 0 */
+ if (strcmp(object_get_typename(OBJECT(func0)),
+ TYPE_VIRTIO_NET_PCI_GENERIC)) {
+ fprintf(stderr, "Device on %x.0 is type %s but must be %s",
+ PCI_SLOT(vpci_dev->pci_dev.devfn),
+ object_get_typename(OBJECT(func0)),
+ TYPE_VIRTIO_NET_PCI_GENERIC);
+ return;
+ }
+ vnet_pci = VIRTIO_NET_PCI(func0);
+ dev->vdev.netdev = &vnet_pci->vdev;
+}
+
+static void virtio_rdma_pci_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+ VirtioPCIClass *vpciklass = VIRTIO_PCI_CLASS(klass);
+
+ k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+ k->device_id = PCI_DEVICE_ID_VIRTIO_RDMA;
+ k->revision = VIRTIO_PCI_ABI_VERSION;
+ k->class_id = PCI_CLASS_NETWORK_OTHER;
+ set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
+ // dc->props_ = virtio_rdma_properties;
+ device_class_set_props(dc, virtio_rdma_properties);
+ vpciklass->realize = virtio_rdma_pci_realize;
+}
+
+static void virtio_rdma_pci_instance_init(Object *obj)
+{
+ VirtIORdmaPCI *dev = VIRTIO_RDMA_PCI(obj);
+
+ virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+ TYPE_VIRTIO_RDMA);
+ /*
+ object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev),
+ "bootindex", &error_abort);
+ */
+}
+
+static const VirtioPCIDeviceTypeInfo virtio_rdma_pci_info = {
+ .base_name = TYPE_VIRTIO_RDMA_PCI,
+ .generic_name = "virtio-rdma-pci",
+ .transitional_name = "virtio-rdma-pci-transitional",
+ .non_transitional_name = "virtio-rdma-pci-non-transitional",
+ .instance_size = sizeof(VirtIORdmaPCI),
+ .instance_init = virtio_rdma_pci_instance_init,
+ .class_init = virtio_rdma_pci_class_init,
+};
+
+static void virtio_rdma_pci_register(void)
+{
+ virtio_pci_types_register(&virtio_rdma_pci_info);
+}
+
+type_init(virtio_rdma_pci_register)
@@ -89,6 +89,7 @@ extern bool pci_available;
#define PCI_DEVICE_ID_VIRTIO_PMEM 0x1013
#define PCI_DEVICE_ID_VIRTIO_IOMMU 0x1014
#define PCI_DEVICE_ID_VIRTIO_MEM 0x1015
+#define PCI_DEVICE_ID_VIRTIO_RDMA 0x1016
#define PCI_VENDOR_ID_REDHAT 0x1b36
#define PCI_DEVICE_ID_REDHAT_BRIDGE 0x0001
new file mode 100644
@@ -0,0 +1,58 @@
+/*
+ * Virtio RDMA Device
+ *
+ * Copyright (C) 2019 Oracle
+ *
+ * Authors:
+ * Yuval Shaia <yuval.shaia@oracle.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_VIRTIO_RDMA_H
+#define QEMU_VIRTIO_RDMA_H
+
+#include <glib.h>
+#include <infiniband/verbs.h>
+
+#include "chardev/char-fe.h"
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/virtio-net.h"
+
+#define TYPE_VIRTIO_RDMA "virtio-rdma-device"
+#define VIRTIO_RDMA(obj) \
+ OBJECT_CHECK(VirtIORdma, (obj), TYPE_VIRTIO_RDMA)
+
+typedef struct RdmaBackendDev RdmaBackendDev;
+typedef struct RdmaDeviceResources RdmaDeviceResources;
+struct ibv_device_attr;
+
+typedef struct VirtIORdma {
+ VirtIODevice parent_obj;
+ VirtQueue *ctrl_vq;
+ VirtIONet *netdev;
+ RdmaBackendDev *backend_dev;
+ RdmaDeviceResources *rdma_dev_res;
+ CharBackend mad_chr;
+ char *backend_eth_device_name;
+ char *backend_device_name;
+ uint8_t backend_port_num;
+ struct ibv_device_attr dev_attr;
+
+ VirtQueue **cq_vqs;
+ VirtQueue **qp_vqs;
+
+ GHashTable *lkey_mr_tbl;
+
+ /* active objects statistics to enforce limits, should write with qatomic */
+ int num_qp;
+ int num_cq;
+ int num_pd;
+ int num_mr;
+ int num_srq;
+ int num_ctx;
+} VirtIORdma;
+
+#endif
@@ -48,5 +48,6 @@
#define VIRTIO_ID_FS 26 /* virtio filesystem */
#define VIRTIO_ID_PMEM 27 /* virtio pmem */
#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */
+#define VIRTIO_ID_RDMA 30 /* virtio rdma */
#endif /* _LINUX_VIRTIO_IDS_H */