@@ -36,6 +36,7 @@
#define u8 uint8_t
enum mlx5_cap_mode {
+ HCA_CAP_OPMOD_GET_MAX = 0,
HCA_CAP_OPMOD_GET_CUR = 1,
};
@@ -46,6 +47,7 @@ enum {
MLX5_CMD_OP_ENABLE_HCA = 0x104,
MLX5_CMD_OP_QUERY_PAGES = 0x107,
MLX5_CMD_OP_MANAGE_PAGES = 0x108,
+ MLX5_CMD_OP_SET_HCA_CAP = 0x109,
MLX5_CMD_OP_QUERY_ISSI = 0x10a,
MLX5_CMD_OP_SET_ISSI = 0x10b,
MLX5_CMD_OP_CREATE_MKEY = 0x200,
@@ -61,6 +63,7 @@ enum {
MLX5_CMD_OP_QUERY_DCT = 0x713,
MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT = 0x752,
MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT = 0x754,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT = 0x755,
MLX5_CMD_OP_QUERY_ROCE_ADDRESS = 0x760,
MLX5_CMD_OP_ACCESS_REG = 0x805,
MLX5_CMD_OP_QUERY_LAG = 0x842,
@@ -110,6 +113,11 @@ enum {
MLX5_REG_HOST_ENDIANNESS = 0x7004,
};
+enum {
+ MLX5_CAP_PORT_TYPE_IB = 0x0,
+ MLX5_CAP_PORT_TYPE_ETH = 0x1,
+};
+
struct mlx5_ifc_atomic_caps_bits {
u8 reserved_at_0[0x40];
@@ -140,7 +148,8 @@ struct mlx5_ifc_atomic_caps_bits {
};
struct mlx5_ifc_roce_cap_bits {
- u8 reserved_0[0x5];
+ u8 reserved_0[0x4];
+ u8 sw_r_roce_src_udp_port[0x1];
u8 fl_rc_qp_when_roce_disabled[0x1];
u8 fl_rc_qp_when_roce_enabled[0x1];
u8 reserved_at_7[0x17];
@@ -912,7 +921,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 uar_4k[0x1];
u8 reserved_at_241[0x9];
u8 uar_sz[0x6];
- u8 reserved_at_250[0x3];
+ u8 reserved_at_250[0x2];
+ u8 umem_uid_0[0x1];
u8 log_max_dc_cnak_qps[0x5];
u8 log_pg_sz[0x8];
@@ -1339,8 +1349,11 @@ struct mlx5_ifc_query_hca_cap_in_bits {
};
enum mlx5_cap_type {
+ MLX5_CAP_GENERAL = 0,
MLX5_CAP_ODP = 2,
MLX5_CAP_ATOMIC = 3,
+ MLX5_CAP_ROCE,
+ MLX5_CAP_NUM,
};
enum {
@@ -4346,4 +4359,74 @@ struct mlx5_ifc_access_register_in_bits {
u8 register_data[][0x20];
};
+struct mlx5_ifc_modify_nic_vport_context_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_modify_nic_vport_field_select_bits {
+ u8 reserved_at_0[0x12];
+ u8 affiliation[0x1];
+ u8 reserved_at_13[0x1];
+ u8 disable_uc_local_lb[0x1];
+ u8 disable_mc_local_lb[0x1];
+ u8 node_guid[0x1];
+ u8 port_guid[0x1];
+ u8 min_inline[0x1];
+ u8 mtu[0x1];
+ u8 change_event[0x1];
+ u8 promisc[0x1];
+ u8 permanent_address[0x1];
+ u8 addresses_list[0x1];
+ u8 roce_en[0x1];
+ u8 reserved_at_1f[0x1];
+};
+
+struct mlx5_ifc_modify_nic_vport_context_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 other_vport[0x1];
+ u8 reserved_at_41[0xf];
+ u8 vport_number[0x10];
+
+ struct mlx5_ifc_modify_nic_vport_field_select_bits field_select;
+
+ u8 reserved_at_80[0x780];
+
+ struct mlx5_ifc_nic_vport_context_bits nic_vport_context;
+};
+
+struct mlx5_ifc_set_hca_cap_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_set_hca_cap_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 other_function[0x1];
+ u8 reserved_at_41[0xf];
+ u8 function_id[0x10];
+
+ u8 reserved_at_60[0x20];
+
+ union mlx5_ifc_hca_cap_union_bits capability;
+};
+
#endif /* MLX5_IFC_H */
@@ -1141,6 +1141,177 @@ out:
return err;
}
+static int mlx5_vfio_get_caps_mode(struct mlx5_vfio_context *ctx,
+ enum mlx5_cap_type cap_type,
+ enum mlx5_cap_mode cap_mode)
+{
+ uint8_t in[DEVX_ST_SZ_BYTES(query_hca_cap_in)] = {};
+ int out_sz = DEVX_ST_SZ_BYTES(query_hca_cap_out);
+ void *out, *hca_caps;
+ uint16_t opmod = (cap_type << 1) | (cap_mode & 0x01);
+ int err;
+
+ out = calloc(1, out_sz);
+ if (!out) {
+ errno = ENOMEM;
+ return errno;
+ }
+
+ DEVX_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+ DEVX_SET(query_hca_cap_in, in, op_mod, opmod);
+ err = mlx5_vfio_cmd_exec(ctx, in, sizeof(in), out, out_sz, 0);
+ if (err)
+ goto query_ex;
+
+ hca_caps = DEVX_ADDR_OF(query_hca_cap_out, out, capability);
+
+ switch (cap_mode) {
+ case HCA_CAP_OPMOD_GET_MAX:
+ memcpy(ctx->caps.hca_max[cap_type], hca_caps,
+ DEVX_UN_SZ_BYTES(hca_cap_union));
+ break;
+ case HCA_CAP_OPMOD_GET_CUR:
+ memcpy(ctx->caps.hca_cur[cap_type], hca_caps,
+ DEVX_UN_SZ_BYTES(hca_cap_union));
+ break;
+ default:
+ err = EINVAL;
+ assert(false);
+ break;
+ }
+
+query_ex:
+ free(out);
+ return err;
+}
+
+enum mlx5_vport_roce_state {
+ MLX5_VPORT_ROCE_DISABLED = 0,
+ MLX5_VPORT_ROCE_ENABLED = 1,
+};
+
+static int mlx5_vfio_nic_vport_update_roce_state(struct mlx5_vfio_context *ctx,
+ enum mlx5_vport_roce_state state)
+{
+ uint32_t out[DEVX_ST_SZ_DW(modify_nic_vport_context_out)] = {};
+ int inlen = DEVX_ST_SZ_BYTES(modify_nic_vport_context_in);
+ void *in;
+ int err;
+
+ in = calloc(1, inlen);
+ if (!in) {
+ errno = ENOMEM;
+ return errno;
+ }
+
+ DEVX_SET(modify_nic_vport_context_in, in, field_select.roce_en, 1);
+ DEVX_SET(modify_nic_vport_context_in, in, nic_vport_context.roce_en,
+ state);
+ DEVX_SET(modify_nic_vport_context_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+
+ err = mlx5_vfio_cmd_exec(ctx, in, inlen, out, sizeof(out), 0);
+
+ free(in);
+
+ return err;
+}
+
+static int mlx5_vfio_get_caps(struct mlx5_vfio_context *ctx, enum mlx5_cap_type cap_type)
+{
+ int ret;
+
+ ret = mlx5_vfio_get_caps_mode(ctx, cap_type, HCA_CAP_OPMOD_GET_CUR);
+ if (ret)
+ return ret;
+
+ return mlx5_vfio_get_caps_mode(ctx, cap_type, HCA_CAP_OPMOD_GET_MAX);
+}
+
+static int handle_hca_cap_roce(struct mlx5_vfio_context *ctx, void *set_ctx,
+ int ctx_size)
+{
+ int err;
+ uint32_t out[DEVX_ST_SZ_DW(set_hca_cap_out)] = {};
+ void *set_hca_cap;
+
+ if (!MLX5_VFIO_CAP_GEN(ctx, roce))
+ return 0;
+
+ err = mlx5_vfio_get_caps(ctx, MLX5_CAP_ROCE);
+ if (err)
+ return err;
+
+ if (MLX5_VFIO_CAP_ROCE(ctx, sw_r_roce_src_udp_port) ||
+ !MLX5_VFIO_CAP_ROCE_MAX(ctx, sw_r_roce_src_udp_port))
+ return 0;
+
+ set_hca_cap = DEVX_ADDR_OF(set_hca_cap_in, set_ctx, capability);
+ memcpy(set_hca_cap, ctx->caps.hca_cur[MLX5_CAP_ROCE],
+ DEVX_ST_SZ_BYTES(roce_cap));
+ DEVX_SET(roce_cap, set_hca_cap, sw_r_roce_src_udp_port, 1);
+ DEVX_SET(set_hca_cap_in, set_ctx, opcode, MLX5_CMD_OP_SET_HCA_CAP);
+ DEVX_SET(set_hca_cap_in, set_ctx, op_mod, MLX5_SET_HCA_CAP_OP_MOD_ROCE);
+ return mlx5_vfio_cmd_exec(ctx, set_ctx, ctx_size, out, sizeof(out), 0);
+}
+
+static int handle_hca_cap(struct mlx5_vfio_context *ctx, void *set_ctx, int set_sz)
+{
+ struct mlx5_vfio_device *dev = to_mvfio_dev(ctx->vctx.context.device);
+ int sys_page_shift = ilog32(dev->page_size - 1);
+ uint32_t out[DEVX_ST_SZ_DW(set_hca_cap_out)] = {};
+ void *set_hca_cap;
+ int err;
+
+ err = mlx5_vfio_get_caps(ctx, MLX5_CAP_GENERAL);
+ if (err)
+ return err;
+
+ set_hca_cap = DEVX_ADDR_OF(set_hca_cap_in, set_ctx,
+ capability);
+ memcpy(set_hca_cap, ctx->caps.hca_cur[MLX5_CAP_GENERAL],
+ DEVX_ST_SZ_BYTES(cmd_hca_cap));
+
+ /* disable cmdif checksum */
+ DEVX_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0);
+
+ if (dev->flags & MLX5DV_VFIO_CTX_FLAGS_INIT_LINK_DOWN)
+ DEVX_SET(cmd_hca_cap, set_hca_cap, disable_link_up_by_init_hca, 1);
+
+ DEVX_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, sys_page_shift - 12);
+
+ if (MLX5_VFIO_CAP_GEN_MAX(ctx, mkey_by_name))
+ DEVX_SET(cmd_hca_cap, set_hca_cap, mkey_by_name, 1);
+
+ DEVX_SET(set_hca_cap_in, set_ctx, opcode, MLX5_CMD_OP_SET_HCA_CAP);
+ DEVX_SET(set_hca_cap_in, set_ctx, op_mod, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
+
+ return mlx5_vfio_cmd_exec(ctx, set_ctx, set_sz, out, sizeof(out), 0);
+}
+
+static int set_hca_cap(struct mlx5_vfio_context *ctx)
+{
+ int set_sz = DEVX_ST_SZ_BYTES(set_hca_cap_in);
+ void *set_ctx;
+ int err;
+
+ set_ctx = calloc(1, set_sz);
+ if (!set_ctx) {
+ errno = ENOMEM;
+ return errno;
+ }
+
+ err = handle_hca_cap(ctx, set_ctx, set_sz);
+ if (err)
+ goto out;
+
+ memset(set_ctx, 0, set_sz);
+ err = handle_hca_cap_roce(ctx, set_ctx, set_sz);
+out:
+ free(set_ctx);
+ return err;
+}
+
static int mlx5_vfio_set_hca_ctrl(struct mlx5_vfio_context *ctx)
{
struct mlx5_reg_host_endianness he_in = {};
@@ -1217,6 +1388,15 @@ static int mlx5_vfio_setup_function(struct mlx5_vfio_context *ctx)
if (err)
return err;
+ err = set_hca_cap(ctx);
+ if (err)
+ return err;
+
+ if (!MLX5_VFIO_CAP_GEN(ctx, umem_uid_0)) {
+ errno = EOPNOTSUPP;
+ return errno;
+ }
+
err = mlx5_vfio_satisfy_startup_pages(ctx, 0);
if (err)
return err;
@@ -1225,7 +1405,10 @@ static int mlx5_vfio_setup_function(struct mlx5_vfio_context *ctx)
if (err)
return err;
- return 0;
+ if (MLX5_VFIO_CAP_GEN(ctx, port_type) == MLX5_CAP_PORT_TYPE_ETH)
+ err = mlx5_vfio_nic_vport_update_roce_state(ctx, MLX5_VPORT_ROCE_ENABLED);
+
+ return err;
}
static void mlx5_vfio_uninit_context(struct mlx5_vfio_context *ctx)
@@ -12,6 +12,7 @@
#include <infiniband/driver.h>
#include <util/interval_set.h>
+#include "mlx5_ifc.h"
#define FW_INIT_WAIT_MS 2
#define FW_PRE_INIT_TIMEOUT_MILI 120000
@@ -43,6 +44,22 @@ struct mlx5_vfio_device {
#error Host endianness not defined
#endif
+/* GET Dev Caps macros */
+#define MLX5_VFIO_CAP_GEN(ctx, cap) \
+ DEVX_GET(cmd_hca_cap, ctx->caps.hca_cur[MLX5_CAP_GENERAL], cap)
+
+#define MLX5_VFIO_CAP_GEN_64(mdev, cap) \
+ DEVX_GET64(cmd_hca_cap, mdev->caps.hca_cur[MLX5_CAP_GENERAL], cap)
+
+#define MLX5_VFIO_CAP_GEN_MAX(ctx, cap) \
+ DEVX_GET(cmd_hca_cap, ctx->caps.hca_max[MLX5_CAP_GENERAL], cap)
+
+#define MLX5_VFIO_CAP_ROCE(ctx, cap) \
+ DEVX_GET(roce_cap, ctx->caps.hca_cur[MLX5_CAP_ROCE], cap)
+
+#define MLX5_VFIO_CAP_ROCE_MAX(ctx, cap) \
+ DEVX_GET(roce_cap, ctx->caps.hca_max[MLX5_CAP_ROCE], cap)
+
struct mlx5_reg_host_endianness {
uint8_t he;
uint8_t rsvd[15];
@@ -162,6 +179,10 @@ struct mlx5_vfio_context {
size_t bar_map_size;
struct mlx5_vfio_cmd cmd;
bool have_eq;
+ struct {
+ uint32_t hca_cur[MLX5_CAP_NUM][DEVX_UN_SZ_DW(hca_cap_union)];
+ uint32_t hca_max[MLX5_CAP_NUM][DEVX_UN_SZ_DW(hca_cap_union)];
+ } caps;
};
static inline struct mlx5_vfio_device *to_mvfio_dev(struct ibv_device *ibdev)
Set basic caps that are required to initialize the device properly. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> --- providers/mlx5/mlx5_ifc.h | 87 ++++++++++++++++++++- providers/mlx5/mlx5_vfio.c | 185 ++++++++++++++++++++++++++++++++++++++++++++- providers/mlx5/mlx5_vfio.h | 21 +++++ 3 files changed, 290 insertions(+), 3 deletions(-)