@@ -51,6 +51,8 @@ enum {
MLX5_CMD_OP_QUERY_ISSI = 0x10a,
MLX5_CMD_OP_SET_ISSI = 0x10b,
MLX5_CMD_OP_CREATE_MKEY = 0x200,
+ MLX5_CMD_OP_CREATE_EQ = 0x301,
+ MLX5_CMD_OP_DESTROY_EQ = 0x302,
MLX5_CMD_OP_CREATE_QP = 0x500,
MLX5_CMD_OP_RST2INIT_QP = 0x502,
MLX5_CMD_OP_INIT2RTR_QP = 0x503,
@@ -65,6 +67,8 @@ enum {
MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT = 0x754,
MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT = 0x755,
MLX5_CMD_OP_QUERY_ROCE_ADDRESS = 0x760,
+ MLX5_CMD_OP_ALLOC_UAR = 0x802,
+ MLX5_CMD_OP_DEALLOC_UAR = 0x803,
MLX5_CMD_OP_ACCESS_REG = 0x805,
MLX5_CMD_OP_QUERY_LAG = 0x842,
MLX5_CMD_OP_CREATE_TIR = 0x900,
@@ -118,6 +122,15 @@ enum {
MLX5_CAP_PORT_TYPE_ETH = 0x1,
};
+enum mlx5_event {
+ MLX5_EVENT_TYPE_CMD = 0x0a,
+ MLX5_EVENT_TYPE_PAGE_REQUEST = 0xb,
+};
+
+enum {
+ MLX5_EQ_DOORBEL_OFFSET = 0x40,
+};
+
struct mlx5_ifc_atomic_caps_bits {
u8 reserved_at_0[0x40];
@@ -4434,4 +4447,141 @@ struct mlx5_ifc_set_hca_cap_in_bits {
union mlx5_ifc_hca_cap_union_bits capability;
};
+struct mlx5_ifc_alloc_uar_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x8];
+ u8 uar[0x18];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_alloc_uar_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_dealloc_uar_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_dealloc_uar_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x8];
+ u8 uar[0x18];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_eqc_bits {
+ u8 status[0x4];
+ u8 reserved_at_4[0x9];
+ u8 ec[0x1];
+ u8 oi[0x1];
+ u8 reserved_at_f[0x5];
+ u8 st[0x4];
+ u8 reserved_at_18[0x8];
+
+ u8 reserved_at_20[0x20];
+
+ u8 reserved_at_40[0x14];
+ u8 page_offset[0x6];
+ u8 reserved_at_5a[0x6];
+
+ u8 reserved_at_60[0x3];
+ u8 log_eq_size[0x5];
+ u8 uar_page[0x18];
+
+ u8 reserved_at_80[0x20];
+
+ u8 reserved_at_a0[0x18];
+ u8 intr[0x8];
+
+ u8 reserved_at_c0[0x3];
+ u8 log_page_size[0x5];
+ u8 reserved_at_c8[0x18];
+
+ u8 reserved_at_e0[0x60];
+
+ u8 reserved_at_140[0x8];
+ u8 consumer_counter[0x18];
+
+ u8 reserved_at_160[0x8];
+ u8 producer_counter[0x18];
+
+ u8 reserved_at_180[0x80];
+};
+
+struct mlx5_ifc_create_eq_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x18];
+ u8 eq_number[0x8];
+
+ u8 reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_create_eq_in_bits {
+ u8 opcode[0x10];
+ u8 uid[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x40];
+
+ struct mlx5_ifc_eqc_bits eq_context_entry;
+
+ u8 reserved_at_280[0x40];
+
+ u8 event_bitmask[4][0x40];
+
+ u8 reserved_at_3c0[0x4c0];
+
+ u8 pas[][0x40];
+};
+
+struct mlx5_ifc_destroy_eq_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_destroy_eq_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x18];
+ u8 eq_number[0x8];
+
+ u8 reserved_at_60[0x20];
+};
+
#endif /* MLX5_IFC_H */
@@ -19,6 +19,7 @@
#include <linux/vfio.h>
#include <sys/eventfd.h>
#include <sys/ioctl.h>
+#include <poll.h>
#include <util/mmio.h>
#include "mlx5dv.h"
@@ -26,6 +27,10 @@
#include "mlx5.h"
#include "mlx5_ifc.h"
+enum {
+ MLX5_VFIO_CMD_VEC_IDX,
+};
+
static void mlx5_vfio_free_cmd_msg(struct mlx5_vfio_context *ctx,
struct mlx5_cmd_msg *msg);
@@ -223,6 +228,37 @@ static const char *cmd_status_str(uint8_t status)
}
}
+static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, uint32_t entry)
+{
+ return eq->vaddr + entry * MLX5_EQE_SIZE;
+}
+
+static struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, uint32_t cc)
+{
+ uint32_t ci = eq->cons_index + cc;
+ struct mlx5_eqe *eqe;
+
+ eqe = get_eqe(eq, ci & (eq->nent - 1));
+ eqe = ((eqe->owner & 1) ^ !!(ci & eq->nent)) ? NULL : eqe;
+
+ if (eqe)
+ udma_from_device_barrier();
+
+ return eqe;
+}
+
+static void eq_update_ci(struct mlx5_eq *eq, uint32_t cc, int arm)
+{
+ __be32 *addr = eq->doorbell + (arm ? 0 : 2);
+ uint32_t val;
+
+ eq->cons_index += cc;
+ val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
+
+ mmio_write32_be(addr, htobe32(val));
+ udma_to_device_barrier();
+}
+
static void mlx5_cmd_mbox_status(void *out, uint8_t *status, uint32_t *syndrome)
{
*status = DEVX_GET(mbox_out, out, status);
@@ -315,6 +351,85 @@ static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size,
return 0;
}
+/* The HCA will think the queue has overflowed if we don't tell it we've been
+ * processing events.
+ * We create EQs with MLX5_NUM_SPARE_EQE extra entries,
+ * so we must update our consumer index at least that often.
+ */
+static inline uint32_t mlx5_eq_update_cc(struct mlx5_eq *eq, uint32_t cc)
+{
+ if (unlikely(cc >= MLX5_NUM_SPARE_EQE)) {
+ eq_update_ci(eq, cc, 0);
+ cc = 0;
+ }
+ return cc;
+}
+
+static int mlx5_vfio_cmd_comp(struct mlx5_vfio_context *ctx, unsigned long slot)
+{
+ uint64_t u = 1;
+ ssize_t s;
+
+ s = write(ctx->cmd.cmds[slot].completion_event_fd, &u,
+ sizeof(uint64_t));
+ if (s != sizeof(uint64_t))
+ return -1;
+
+ return 0;
+}
+
+static int mlx5_vfio_process_cmd_eqe(struct mlx5_vfio_context *ctx,
+ struct mlx5_eqe *eqe)
+{
+ struct mlx5_eqe_cmd *cmd_eqe = &eqe->data.cmd;
+ unsigned long vector = be32toh(cmd_eqe->vector);
+ unsigned long slot;
+ int count = 0;
+ int ret;
+
+ for (slot = 0; slot < MLX5_MAX_COMMANDS; slot++) {
+ if (vector & (1 << slot)) {
+ assert(ctx->cmd.cmds[slot].comp_func);
+ ret = ctx->cmd.cmds[slot].comp_func(ctx, slot);
+ if (ret)
+ return ret;
+
+ vector &= ~(1 << slot);
+ count++;
+ }
+ }
+
+ assert(!vector && count);
+ return 0;
+}
+
+static int mlx5_vfio_process_async_events(struct mlx5_vfio_context *ctx)
+{
+ struct mlx5_eqe *eqe;
+ int ret = 0;
+ int cc = 0;
+
+ pthread_mutex_lock(&ctx->eq_lock);
+ while ((eqe = mlx5_eq_get_eqe(&ctx->async_eq, cc))) {
+ switch (eqe->type) {
+ case MLX5_EVENT_TYPE_CMD:
+ ret = mlx5_vfio_process_cmd_eqe(ctx, eqe);
+ break;
+ default:
+ break;
+ }
+
+ cc = mlx5_eq_update_cc(&ctx->async_eq, ++cc);
+ if (ret)
+ goto out;
+ }
+
+out:
+ eq_update_ci(&ctx->async_eq, cc, 1);
+ pthread_mutex_unlock(&ctx->eq_lock);
+ return ret;
+}
+
static int mlx5_vfio_enlarge_cmd_msg(struct mlx5_vfio_context *ctx, struct mlx5_cmd_msg *cmd_msg,
struct mlx5_cmd_layout *cmd_lay, uint32_t len, bool is_in)
{
@@ -333,6 +448,49 @@ static int mlx5_vfio_enlarge_cmd_msg(struct mlx5_vfio_context *ctx, struct mlx5_
return 0;
}
+static int mlx5_vfio_wait_event(struct mlx5_vfio_context *ctx,
+ unsigned int slot)
+{
+ struct mlx5_cmd_layout *cmd_lay = ctx->cmd.cmds[slot].lay;
+ uint64_t u;
+ ssize_t s;
+ int err;
+
+ struct pollfd fds[2] = {
+ { .fd = ctx->cmd_comp_fd, .events = POLLIN },
+ { .fd = ctx->cmd.cmds[slot].completion_event_fd, .events = POLLIN }
+ };
+
+ while (true) {
+ err = poll(fds, 2, -1);
+ if (err < 0 && errno != EAGAIN) {
+ mlx5_err(ctx->dbg_fp, "mlx5_vfio_wait_event, poll failed, errno=%d\n", errno);
+ return errno;
+ }
+ if (fds[0].revents & POLLIN) {
+ s = read(fds[0].fd, &u, sizeof(uint64_t));
+ if (s < 0 && errno != EAGAIN) {
+ mlx5_err(ctx->dbg_fp, "mlx5_vfio_wait_event, read failed, errno=%d\n", errno);
+ return errno;
+ }
+
+ err = mlx5_vfio_process_async_events(ctx);
+ if (err)
+ return err;
+ }
+ if (fds[1].revents & POLLIN) {
+ s = read(fds[1].fd, &u, sizeof(uint64_t));
+ if (s < 0 && errno != EAGAIN) {
+ mlx5_err(ctx->dbg_fp, "mlx5_vfio_wait_event, read failed, slot=%d, errno=%d\n",
+ slot, errno);
+ return errno;
+ }
+ if (!(mmio_read8(&cmd_lay->status_own) & 0x1))
+ return 0;
+ }
+ }
+}
+
/* One minute for the sake of bringup */
#define MLX5_CMD_TIMEOUT_MSEC (60 * 1000)
@@ -430,10 +588,17 @@ static int mlx5_vfio_cmd_exec(struct mlx5_vfio_context *ctx, void *in,
udma_to_device_barrier();
mmio_write32_be(&init_seg->cmd_dbell, htobe32(0x1 << slot));
- err = mlx5_vfio_poll_timeout(cmd_lay);
- if (err)
- goto end;
- udma_from_device_barrier();
+ if (ctx->have_eq) {
+ err = mlx5_vfio_wait_event(ctx, slot);
+ if (err)
+ goto end;
+ } else {
+ err = mlx5_vfio_poll_timeout(cmd_lay);
+ if (err)
+ goto end;
+ udma_from_device_barrier();
+ }
+
err = mlx5_copy_from_msg(out, cmd_out, olen, cmd_lay);
if (err)
goto end;
@@ -608,6 +773,9 @@ static int mlx5_vfio_setup_cmd_slot(struct mlx5_vfio_context *ctx, int slot)
goto err_fd;
}
+ if (slot != MLX5_MAX_COMMANDS - 1)
+ cmd_slot->comp_func = mlx5_vfio_cmd_comp;
+
pthread_mutex_init(&cmd_slot->lock, NULL);
return 0;
@@ -889,7 +1057,7 @@ mlx5_vfio_enable_msix(struct mlx5_vfio_context *ctx)
irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
irq_set->start = 0;
fd_ptr = (int *)&irq_set->data;
- fd_ptr[0] = ctx->cmd_comp_fd;
+ fd_ptr[MLX5_VFIO_CMD_VEC_IDX] = ctx->cmd_comp_fd;
return ioctl(ctx->device_fd, VFIO_DEVICE_SET_IRQS, irq_set);
}
@@ -907,7 +1075,7 @@ static int mlx5_vfio_init_async_fd(struct mlx5_vfio_context *ctx)
return -1;
/* set up an eventfd for command completion interrupts */
- ctx->cmd_comp_fd = eventfd(0, EFD_CLOEXEC);
+ ctx->cmd_comp_fd = eventfd(0, EFD_CLOEXEC | O_NONBLOCK);
if (ctx->cmd_comp_fd < 0)
return -1;
@@ -988,6 +1156,193 @@ close_cont:
return -1;
}
+enum {
+ MLX5_EQE_OWNER_INIT_VAL = 0x1,
+};
+
+static void init_eq_buf(struct mlx5_eq *eq)
+{
+ struct mlx5_eqe *eqe;
+ int i;
+
+ for (i = 0; i < eq->nent; i++) {
+ eqe = get_eqe(eq, i);
+ eqe->owner = MLX5_EQE_OWNER_INIT_VAL;
+ }
+}
+
+static uint64_t uar2iova(struct mlx5_vfio_context *ctx, uint32_t index)
+{
+ return (uint64_t)((void *)ctx->bar_map + (index * MLX5_ADAPTER_PAGE_SIZE));
+}
+
+static int mlx5_vfio_alloc_uar(struct mlx5_vfio_context *ctx, uint32_t *uarn)
+{
+ uint32_t out[DEVX_ST_SZ_DW(alloc_uar_out)] = {};
+ uint32_t in[DEVX_ST_SZ_DW(alloc_uar_in)] = {};
+ int err;
+
+ DEVX_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR);
+ err = mlx5_vfio_cmd_exec(ctx, in, sizeof(in), out, sizeof(out), 0);
+ if (!err)
+ *uarn = DEVX_GET(alloc_uar_out, out, uar);
+
+ return err;
+}
+
+static void mlx5_vfio_dealloc_uar(struct mlx5_vfio_context *ctx, uint32_t uarn)
+{
+ uint32_t out[DEVX_ST_SZ_DW(dealloc_uar_out)] = {};
+ uint32_t in[DEVX_ST_SZ_DW(dealloc_uar_in)] = {};
+
+ DEVX_SET(dealloc_uar_in, in, opcode, MLX5_CMD_OP_DEALLOC_UAR);
+ DEVX_SET(dealloc_uar_in, in, uar, uarn);
+ mlx5_vfio_cmd_exec(ctx, in, sizeof(in), out, sizeof(out), 0);
+}
+
+static void mlx5_vfio_destroy_eq(struct mlx5_vfio_context *ctx, struct mlx5_eq *eq)
+{
+ uint32_t in[DEVX_ST_SZ_DW(destroy_eq_in)] = {};
+ uint32_t out[DEVX_ST_SZ_DW(destroy_eq_out)] = {};
+
+ DEVX_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ);
+ DEVX_SET(destroy_eq_in, in, eq_number, eq->eqn);
+
+ mlx5_vfio_cmd_exec(ctx, in, sizeof(in), out, sizeof(out), 0);
+ mlx5_vfio_unregister_mem(ctx, eq->iova, eq->iova_size);
+ iset_insert_range(ctx->iova_alloc, eq->iova, eq->iova_size);
+ free(eq->vaddr);
+}
+
+static void destroy_async_eqs(struct mlx5_vfio_context *ctx)
+{
+ ctx->have_eq = false;
+ mlx5_vfio_destroy_eq(ctx, &ctx->async_eq);
+ mlx5_vfio_dealloc_uar(ctx, ctx->eqs_uar.uarn);
+}
+
+static int
+create_map_eq(struct mlx5_vfio_context *ctx, struct mlx5_eq *eq,
+ struct mlx5_eq_param *param)
+{
+ uint32_t out[DEVX_ST_SZ_DW(create_eq_out)] = {};
+ uint8_t vecidx = param->irq_index;
+ __be64 *pas;
+ void *eqc;
+ int inlen;
+ uint32_t *in;
+ int err;
+ int i;
+ int alloc_size;
+
+ pthread_mutex_init(&ctx->eq_lock, NULL);
+ eq->nent = roundup_pow_of_two(param->nent + MLX5_NUM_SPARE_EQE);
+ eq->cons_index = 0;
+ alloc_size = eq->nent * MLX5_EQE_SIZE;
+ eq->iova_size = max(roundup_pow_of_two(alloc_size), ctx->iova_min_page_size);
+
+ inlen = DEVX_ST_SZ_BYTES(create_eq_in) +
+ DEVX_FLD_SZ_BYTES(create_eq_in, pas[0]) * 1;
+
+ in = calloc(1, inlen);
+ if (!in)
+ return ENOMEM;
+
+ pas = (__be64 *)DEVX_ADDR_OF(create_eq_in, in, pas);
+
+ err = posix_memalign(&eq->vaddr, eq->iova_size, alloc_size);
+ if (err) {
+ errno = err;
+ goto end;
+ }
+
+ err = iset_alloc_range(ctx->iova_alloc, eq->iova_size, &eq->iova);
+ if (err)
+ goto err_range;
+
+ err = mlx5_vfio_register_mem(ctx, eq->vaddr, eq->iova, eq->iova_size);
+ if (err)
+ goto err_reg;
+
+ pas[0] = htobe64(eq->iova);
+ init_eq_buf(eq);
+ DEVX_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ);
+
+ for (i = 0; i < 4; i++)
+ DEVX_ARRAY_SET64(create_eq_in, in, event_bitmask, i,
+ param->mask[i]);
+
+ eqc = DEVX_ADDR_OF(create_eq_in, in, eq_context_entry);
+ DEVX_SET(eqc, eqc, log_eq_size, ilog32(eq->nent - 1));
+ DEVX_SET(eqc, eqc, uar_page, ctx->eqs_uar.uarn);
+ DEVX_SET(eqc, eqc, intr, vecidx);
+ DEVX_SET(eqc, eqc, log_page_size, ilog32(eq->iova_size - 1) - MLX5_ADAPTER_PAGE_SHIFT);
+
+ err = mlx5_vfio_cmd_exec(ctx, in, inlen, out, sizeof(out), 0);
+ if (err)
+ goto err_cmd;
+
+ eq->vecidx = vecidx;
+ eq->eqn = DEVX_GET(create_eq_out, out, eq_number);
+ eq->doorbell = (void *)ctx->eqs_uar.iova + MLX5_EQ_DOORBEL_OFFSET;
+
+ free(in);
+ return 0;
+
+err_cmd:
+ mlx5_vfio_unregister_mem(ctx, eq->iova, eq->iova_size);
+err_reg:
+ iset_insert_range(ctx->iova_alloc, eq->iova, eq->iova_size);
+err_range:
+ free(eq->vaddr);
+end:
+ free(in);
+ return err;
+}
+
+static int
+setup_async_eq(struct mlx5_vfio_context *ctx, struct mlx5_eq_param *param,
+ struct mlx5_eq *eq)
+{
+ int err;
+
+ err = create_map_eq(ctx, eq, param);
+ if (err)
+ return err;
+
+ eq_update_ci(eq, 0, 1);
+
+ return 0;
+}
+
+static int create_async_eqs(struct mlx5_vfio_context *ctx)
+{
+ struct mlx5_eq_param param = {};
+ int err;
+
+ err = mlx5_vfio_alloc_uar(ctx, &ctx->eqs_uar.uarn);
+ if (err)
+ return err;
+
+ ctx->eqs_uar.iova = uar2iova(ctx, ctx->eqs_uar.uarn);
+
+ param = (struct mlx5_eq_param) {
+ .irq_index = MLX5_VFIO_CMD_VEC_IDX,
+ .nent = MLX5_NUM_CMD_EQE,
+ .mask[0] = 1ull << MLX5_EVENT_TYPE_CMD,
+ };
+
+ err = setup_async_eq(ctx, ¶m, &ctx->async_eq);
+ if (err)
+ goto err;
+
+ ctx->have_eq = true;
+ return 0;
+err:
+ mlx5_vfio_dealloc_uar(ctx, ctx->eqs_uar.uarn);
+ return err;
+}
+
static int mlx5_vfio_enable_hca(struct mlx5_vfio_context *ctx)
{
uint32_t in[DEVX_ST_SZ_DW(enable_hca_in)] = {};
@@ -1497,6 +1852,7 @@ static void mlx5_vfio_free_context(struct ibv_context *ibctx)
{
struct mlx5_vfio_context *ctx = to_mvfio_ctx(ibctx);
+ destroy_async_eqs(ctx);
mlx5_vfio_teardown_hca(ctx);
mlx5_vfio_clean_cmd_interface(ctx);
mlx5_vfio_clean_device_dma(ctx);
@@ -1541,9 +1897,14 @@ mlx5_vfio_alloc_context(struct ibv_device *ibdev,
if (mlx5_vfio_setup_function(mctx))
goto clean_cmd;
+ if (create_async_eqs(mctx))
+ goto func_teardown;
+
verbs_set_ops(&mctx->vctx, &mlx5_vfio_common_ops);
return &mctx->vctx;
+func_teardown:
+ mlx5_vfio_teardown_hca(mctx);
clean_cmd:
mlx5_vfio_clean_cmd_interface(mctx);
err_dma:
@@ -60,6 +60,8 @@ struct mlx5_vfio_device {
#define MLX5_VFIO_CAP_ROCE_MAX(ctx, cap) \
DEVX_GET(roce_cap, ctx->caps.hca_max[MLX5_CAP_ROCE], cap)
+struct mlx5_vfio_context;
+
struct mlx5_reg_host_endianness {
uint8_t he;
uint8_t rsvd[15];
@@ -149,12 +151,16 @@ struct mlx5_cmd_msg {
struct mlx5_cmd_mailbox *next;
};
+typedef int (*vfio_cmd_slot_comp)(struct mlx5_vfio_context *ctx,
+ unsigned long slot);
+
struct mlx5_vfio_cmd_slot {
struct mlx5_cmd_layout *lay;
struct mlx5_cmd_msg in;
struct mlx5_cmd_msg out;
pthread_mutex_t lock;
int completion_event_fd;
+ vfio_cmd_slot_comp comp_func;
};
struct mlx5_vfio_cmd {
@@ -165,6 +171,62 @@ struct mlx5_vfio_cmd {
struct mlx5_vfio_cmd_slot cmds[MLX5_MAX_COMMANDS];
};
+struct mlx5_eq_param {
+ uint8_t irq_index;
+ int nent;
+ uint64_t mask[4];
+};
+
+struct mlx5_eq {
+ __be32 *doorbell;
+ uint32_t cons_index;
+ unsigned int vecidx;
+ uint8_t eqn;
+ int nent;
+ void *vaddr;
+ uint64_t iova;
+ uint64_t iova_size;
+};
+
+struct mlx5_eqe_cmd {
+ __be32 vector;
+ __be32 rsvd[6];
+};
+
+struct mlx5_eqe_page_req {
+ __be16 ec_function;
+ __be16 func_id;
+ __be32 num_pages;
+ __be32 rsvd1[5];
+};
+
+union ev_data {
+ __be32 raw[7];
+ struct mlx5_eqe_cmd cmd;
+ struct mlx5_eqe_page_req req_pages;
+};
+
+struct mlx5_eqe {
+ uint8_t rsvd0;
+ uint8_t type;
+ uint8_t rsvd1;
+ uint8_t sub_type;
+ __be32 rsvd2[7];
+ union ev_data data;
+ __be16 rsvd3;
+ uint8_t signature;
+ uint8_t owner;
+};
+
+#define MLX5_EQE_SIZE (sizeof(struct mlx5_eqe))
+#define MLX5_NUM_CMD_EQE (32)
+#define MLX5_NUM_SPARE_EQE (0x80)
+
+struct mlx5_vfio_eqs_uar {
+ uint32_t uarn;
+ uint64_t iova;
+};
+
struct mlx5_vfio_context {
struct verbs_context vctx;
int container_fd;
@@ -183,6 +245,9 @@ struct mlx5_vfio_context {
uint32_t hca_cur[MLX5_CAP_NUM][DEVX_UN_SZ_DW(hca_cap_union)];
uint32_t hca_max[MLX5_CAP_NUM][DEVX_UN_SZ_DW(hca_cap_union)];
} caps;
+ struct mlx5_eq async_eq;
+ struct mlx5_vfio_eqs_uar eqs_uar;
+ pthread_mutex_t eq_lock;
};
static inline struct mlx5_vfio_device *to_mvfio_dev(struct ibv_device *ibdev)
Enable interrupt command mode over vfio by using EQ and its related device stuff. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> --- providers/mlx5/mlx5_ifc.h | 150 ++++++++++++++++++ providers/mlx5/mlx5_vfio.c | 373 ++++++++++++++++++++++++++++++++++++++++++++- providers/mlx5/mlx5_vfio.h | 65 ++++++++ 3 files changed, 582 insertions(+), 6 deletions(-)