@@ -1,6 +1,8 @@
libmlx4.so.1 ibverbs-providers #MINVER#
MLX4_1.0@MLX4_1.0 15
mlx4dv_init_obj@MLX4_1.0 15
+ mlx4dv_query_device@MLX4_1.0 15
+ mlx4dv_create_qp@MLX4_1.0 15
libmlx5.so.1 ibverbs-providers #MINVER#
MLX5_1.0@MLX5_1.0 13
MLX5_1.1@MLX5_1.1 14
@@ -3,5 +3,7 @@
MLX4_1.0 {
global:
mlx4dv_init_obj;
+ mlx4dv_query_device;
+ mlx4dv_create_qp;
local: *;
};
@@ -1,4 +1,5 @@
rdma_man_pages(
mlx4dv_init_obj.3
+ mlx4dv_query_device.3
mlx4dv.7
)
new file mode 100644
@@ -0,0 +1,42 @@
+.\" -*- nroff -*-
+.\" Licensed under the OpenIB.org (MIT) - See COPYING.md
+.\"
+.TH MLX4DV_QUERY_DEVICE 3 2017-06-27 1.0.0
+.SH "NAME"
+mlx4dv_query_device \- Query device capabilities specific to mlx4
+.SH "SYNOPSIS"
+.nf
+.B #include <infiniband/mlx4dv.h>
+.sp
+.BI "int mlx4dv_query_device(struct ibv_context *ctx_in,
+.BI " struct mlx4dv_context *attrs_out);
+.fi
+.SH "DESCRIPTION"
+.B mlx4dv_query_device()
+Query HW device-specific information which is important for data-path, but isn't provided by
+\fBibv_query_device\fR(3).
+.PP
+This function returns version and compatibility mask. The version represents the format
+of the internal hardware structures that mlx4dv.h represents. Additions of new fields to the existed
+structures are handled by comp_mask field.
+.PP
+.nf
+struct mlx4dv_context {
+.in +8
+uint8_t version;
+uint32_t max_inl_recv_sz; /* Maximum supported size of inline receive */
+uint64_t comp_mask;
+.in -8
+};
+
+.fi
+.SH "RETURN VALUE"
+0 on success or the value of errno on failure (which indicates the failure reason).
+.SH "NOTES"
+ * Compatibility mask (comp_mask) is in/out field.
+.SH "SEE ALSO"
+.BR mlx4dv (7),
+.BR ibv_query_device (3)
+.SH "AUTHORS"
+.TP
+Maor Gottlieb <maorg@mellanox.com>
@@ -104,6 +104,8 @@ struct mlx4_query_device_ex_resp {
__u32 comp_mask;
__u32 response_length;
__u64 hca_core_clock_offset;
+ __u32 max_inl_recv_sz;
+ __u32 reserved;
};
struct mlx4_query_device_ex {
@@ -135,7 +137,8 @@ struct mlx4_create_qp {
__u8 log_sq_bb_count;
__u8 log_sq_stride;
__u8 sq_no_prefetch; /* was reserved in ABI 2 */
- __u8 reserved[5];
+ __u8 reserved;
+ __u32 inl_recv_sz;
};
struct mlx4_create_qp_drv_ex {
@@ -427,3 +427,16 @@ int mlx4dv_init_obj(struct mlx4dv_obj *obj, uint64_t obj_type)
return ret;
}
+
+int mlx4dv_query_device(struct ibv_context *ctx_in,
+ struct mlx4dv_context *attrs_out)
+{
+ struct mlx4_context *mctx = to_mctx(ctx_in);
+
+ attrs_out->version = 0;
+ attrs_out->comp_mask = 0;
+
+ attrs_out->max_inl_recv_sz = mctx->max_inl_recv_sz;
+
+ return 0;
+}
@@ -133,6 +133,7 @@ struct mlx4_context {
uint8_t offset_valid;
} core_clock;
void *hca_core_clock;
+ uint32_t max_inl_recv_sz;
};
struct mlx4_buf {
@@ -385,7 +386,8 @@ int mlx4_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
struct mlx4_qp *qp);
int mlx4_alloc_qp_buf(struct ibv_context *context, struct ibv_qp_cap *cap,
- enum ibv_qp_type type, struct mlx4_qp *qp);
+ enum ibv_qp_type type, struct mlx4_qp *qp,
+ struct mlx4dv_qp_init_attr *mlx4qp_attr);
void mlx4_set_sq_sizes(struct mlx4_qp *qp, struct ibv_qp_cap *cap,
enum ibv_qp_type type);
struct mlx4_qp *mlx4_find_qp(struct mlx4_context *ctx, uint32_t qpn);
@@ -379,6 +379,29 @@ struct mlx4_wqe_atomic_seg {
__be64 compare;
};
+enum mlx4dv_qp_init_attr_mask {
+ MLX4DV_QP_INIT_ATTR_MASK_INL_RECV = 1 << 0,
+ MLX4DV_QP_INIT_ATTR_MASK_RESERVED = 1 << 1,
+};
+
+struct mlx4dv_qp_init_attr {
+ uint64_t comp_mask; /* Use enum mlx4dv_qp_init_attr_mask */
+ uint32_t inl_recv_sz;
+};
+
+struct ibv_qp *mlx4dv_create_qp(struct ibv_context *context,
+ struct ibv_qp_init_attr_ex *attr,
+ struct mlx4dv_qp_init_attr *mlx4_qp_attr);
+
+/*
+ * Direct verbs device-specific attributes
+ */
+struct mlx4dv_context {
+ uint8_t version;
+ uint32_t max_inl_recv_sz;
+ uint64_t comp_mask;
+};
+
/*
* Control segment - contains some control information for the current WQE.
*
@@ -465,5 +488,15 @@ void mlx4dv_set_data_seg(struct mlx4_wqe_data_seg *seg,
seg->lkey = htobe32(lkey);
seg->addr = htobe64(address);
}
+
+/* Most device capabilities are exported by ibv_query_device(...),
+ * but there is HW device-specific information which is important
+ * for data-path, but isn't provided.
+ *
+ * Return 0 on success.
+ */
+int mlx4dv_query_device(struct ibv_context *ctx_in,
+ struct mlx4dv_context *attrs_out);
+
#endif /* _MLX4DV_H_ */
@@ -651,9 +651,17 @@ void mlx4_calc_sq_wqe_size(struct ibv_qp_cap *cap, enum ibv_qp_type type,
}
int mlx4_alloc_qp_buf(struct ibv_context *context, struct ibv_qp_cap *cap,
- enum ibv_qp_type type, struct mlx4_qp *qp)
+ enum ibv_qp_type type, struct mlx4_qp *qp,
+ struct mlx4dv_qp_init_attr *mlx4qp_attr)
{
+ int wqe_size;
+
qp->rq.max_gs = cap->max_recv_sge;
+ wqe_size = qp->rq.max_gs * sizeof(struct mlx4_wqe_data_seg);
+ if (mlx4qp_attr &&
+ mlx4qp_attr->comp_mask & MLX4DV_QP_INIT_ATTR_MASK_INL_RECV &&
+ mlx4qp_attr->inl_recv_sz > wqe_size)
+ wqe_size = mlx4qp_attr->inl_recv_sz;
if (qp->sq.wqe_cnt) {
qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof (uint64_t));
@@ -670,9 +678,11 @@ int mlx4_alloc_qp_buf(struct ibv_context *context, struct ibv_qp_cap *cap,
}
for (qp->rq.wqe_shift = 4;
- 1 << qp->rq.wqe_shift < qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg);
+ 1 << qp->rq.wqe_shift < wqe_size;
qp->rq.wqe_shift++)
; /* nothing */
+ if (mlx4qp_attr)
+ mlx4qp_attr->inl_recv_sz = 1 << qp->rq.wqe_shift;
qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
(qp->sq.wqe_cnt << qp->sq.wqe_shift);
@@ -91,6 +91,7 @@ int mlx4_query_device_ex(struct ibv_context *context,
mctx->core_clock.offset = resp.hca_core_clock_offset;
mctx->core_clock.offset_valid = 1;
}
+ mctx->max_inl_recv_sz = resp.max_inl_recv_sz;
major = (raw_fw_ver >> 32) & 0xffff;
minor = (raw_fw_ver >> 16) & 0xffff;
@@ -781,12 +782,13 @@ enum {
MLX4_CREATE_QP_EX2_COMP_MASK = (IBV_QP_INIT_ATTR_CREATE_FLAGS),
};
-struct ibv_qp *mlx4_create_qp_ex(struct ibv_context *context,
- struct ibv_qp_init_attr_ex *attr)
+static struct ibv_qp *create_qp_ex(struct ibv_context *context,
+ struct ibv_qp_init_attr_ex *attr,
+ struct mlx4dv_qp_init_attr *mlx4qp_attr)
{
struct mlx4_context *ctx = to_mctx(context);
- struct mlx4_create_qp cmd;
- struct ibv_create_qp_resp resp;
+ struct mlx4_create_qp cmd = {};
+ struct ibv_create_qp_resp resp = {};
struct mlx4_qp *qp;
int ret;
@@ -837,7 +839,8 @@ struct ibv_qp *mlx4_create_qp_ex(struct ibv_context *context,
attr->cap.max_recv_wr = 1;
}
- if (mlx4_alloc_qp_buf(context, &attr->cap, attr->qp_type, qp))
+ if (mlx4_alloc_qp_buf(context, &attr->cap, attr->qp_type, qp,
+ mlx4qp_attr))
goto err;
mlx4_init_qp_indices(qp);
@@ -846,6 +849,15 @@ struct ibv_qp *mlx4_create_qp_ex(struct ibv_context *context,
pthread_spin_init(&qp->rq.lock, PTHREAD_PROCESS_PRIVATE))
goto err_free;
+ if (mlx4qp_attr) {
+ if (mlx4qp_attr->comp_mask &
+ ~(MLX4DV_QP_INIT_ATTR_MASK_RESERVED - 1)) {
+ errno = EINVAL;
+ goto err_free;
+ }
+ if (mlx4qp_attr->comp_mask & MLX4DV_QP_INIT_ATTR_MASK_INL_RECV)
+ cmd.inl_recv_sz = mlx4qp_attr->inl_recv_sz;
+ }
if (attr->cap.max_recv_sge) {
qp->db = mlx4_alloc_db(to_mctx(context), MLX4_DB_TYPE_RQ);
if (!qp->db)
@@ -864,9 +876,9 @@ struct ibv_qp *mlx4_create_qp_ex(struct ibv_context *context,
++cmd.log_sq_bb_count)
; /* nothing */
cmd.sq_no_prefetch = 0; /* OK for ABI 2: just a reserved field */
- memset(cmd.reserved, 0, sizeof cmd.reserved);
pthread_mutex_lock(&to_mctx(context)->qp_table_mutex);
+
if (attr->comp_mask & MLX4_CREATE_QP_EX2_COMP_MASK)
ret = mlx4_cmd_create_qp_ex(context, attr, &cmd, qp);
else
@@ -917,6 +929,19 @@ err:
return NULL;
}
+struct ibv_qp *mlx4_create_qp_ex(struct ibv_context *context,
+ struct ibv_qp_init_attr_ex *attr)
+{
+ return create_qp_ex(context, attr, NULL);
+}
+
+struct ibv_qp *mlx4dv_create_qp(struct ibv_context *context,
+ struct ibv_qp_init_attr_ex *attr,
+ struct mlx4dv_qp_init_attr *mlx4_qp_attr)
+{
+ return create_qp_ex(context, attr, mlx4_qp_attr);
+}
+
struct ibv_qp *mlx4_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr)
{
struct ibv_qp_init_attr_ex attr_ex;