diff mbox series

[rdma-core,09/27] mlx5: vfio setup basic caps

Message ID 20210720081647.1980-10-yishaih@nvidia.com (mailing list archive)
State Not Applicable
Headers show
Series Introduce mlx5 user space driver over VFIO | expand

Commit Message

Yishai Hadas July 20, 2021, 8:16 a.m. UTC
Set basic caps that are required to initialize the device properly.

Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
---
 providers/mlx5/mlx5_ifc.h  |  87 ++++++++++++++++++++-
 providers/mlx5/mlx5_vfio.c | 185 ++++++++++++++++++++++++++++++++++++++++++++-
 providers/mlx5/mlx5_vfio.h |  21 +++++
 3 files changed, 290 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/providers/mlx5/mlx5_ifc.h b/providers/mlx5/mlx5_ifc.h
index ac741cd..082ac1f 100644
--- a/providers/mlx5/mlx5_ifc.h
+++ b/providers/mlx5/mlx5_ifc.h
@@ -36,6 +36,7 @@ 
 #define u8 uint8_t
 
 enum mlx5_cap_mode {
+	HCA_CAP_OPMOD_GET_MAX = 0,
 	HCA_CAP_OPMOD_GET_CUR	= 1,
 };
 
@@ -46,6 +47,7 @@  enum {
 	MLX5_CMD_OP_ENABLE_HCA = 0x104,
 	MLX5_CMD_OP_QUERY_PAGES = 0x107,
 	MLX5_CMD_OP_MANAGE_PAGES = 0x108,
+	MLX5_CMD_OP_SET_HCA_CAP = 0x109,
 	MLX5_CMD_OP_QUERY_ISSI = 0x10a,
 	MLX5_CMD_OP_SET_ISSI = 0x10b,
 	MLX5_CMD_OP_CREATE_MKEY = 0x200,
@@ -61,6 +63,7 @@  enum {
 	MLX5_CMD_OP_QUERY_DCT = 0x713,
 	MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT = 0x752,
 	MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT = 0x754,
+	MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT = 0x755,
 	MLX5_CMD_OP_QUERY_ROCE_ADDRESS = 0x760,
 	MLX5_CMD_OP_ACCESS_REG = 0x805,
 	MLX5_CMD_OP_QUERY_LAG = 0x842,
@@ -110,6 +113,11 @@  enum {
 	MLX5_REG_HOST_ENDIANNESS = 0x7004,
 };
 
+enum {
+	MLX5_CAP_PORT_TYPE_IB  = 0x0,
+	MLX5_CAP_PORT_TYPE_ETH = 0x1,
+};
+
 struct mlx5_ifc_atomic_caps_bits {
 	u8         reserved_at_0[0x40];
 
@@ -140,7 +148,8 @@  struct mlx5_ifc_atomic_caps_bits {
 };
 
 struct mlx5_ifc_roce_cap_bits {
-	u8         reserved_0[0x5];
+	u8         reserved_0[0x4];
+	u8         sw_r_roce_src_udp_port[0x1];
 	u8         fl_rc_qp_when_roce_disabled[0x1];
 	u8         fl_rc_qp_when_roce_enabled[0x1];
 	u8         reserved_at_7[0x17];
@@ -912,7 +921,8 @@  struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         uar_4k[0x1];
 	u8         reserved_at_241[0x9];
 	u8         uar_sz[0x6];
-	u8         reserved_at_250[0x3];
+	u8         reserved_at_250[0x2];
+	u8         umem_uid_0[0x1];
 	u8         log_max_dc_cnak_qps[0x5];
 	u8         log_pg_sz[0x8];
 
@@ -1339,8 +1349,11 @@  struct mlx5_ifc_query_hca_cap_in_bits {
 };
 
 enum mlx5_cap_type {
+	MLX5_CAP_GENERAL = 0,
 	MLX5_CAP_ODP = 2,
 	MLX5_CAP_ATOMIC = 3,
+	MLX5_CAP_ROCE,
+	MLX5_CAP_NUM,
 };
 
 enum {
@@ -4346,4 +4359,74 @@  struct mlx5_ifc_access_register_in_bits {
 	u8         register_data[][0x20];
 };
 
+struct mlx5_ifc_modify_nic_vport_context_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_modify_nic_vport_field_select_bits {
+	u8         reserved_at_0[0x12];
+	u8         affiliation[0x1];
+	u8         reserved_at_13[0x1];
+	u8         disable_uc_local_lb[0x1];
+	u8         disable_mc_local_lb[0x1];
+	u8         node_guid[0x1];
+	u8         port_guid[0x1];
+	u8         min_inline[0x1];
+	u8         mtu[0x1];
+	u8         change_event[0x1];
+	u8         promisc[0x1];
+	u8         permanent_address[0x1];
+	u8         addresses_list[0x1];
+	u8         roce_en[0x1];
+	u8         reserved_at_1f[0x1];
+};
+
+struct mlx5_ifc_modify_nic_vport_context_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         other_vport[0x1];
+	u8         reserved_at_41[0xf];
+	u8         vport_number[0x10];
+
+	struct mlx5_ifc_modify_nic_vport_field_select_bits field_select;
+
+	u8         reserved_at_80[0x780];
+
+	struct mlx5_ifc_nic_vport_context_bits nic_vport_context;
+};
+
+struct mlx5_ifc_set_hca_cap_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_set_hca_cap_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         other_function[0x1];
+	u8         reserved_at_41[0xf];
+	u8         function_id[0x10];
+
+	u8         reserved_at_60[0x20];
+
+	union mlx5_ifc_hca_cap_union_bits capability;
+};
+
 #endif /* MLX5_IFC_H */
diff --git a/providers/mlx5/mlx5_vfio.c b/providers/mlx5/mlx5_vfio.c
index 4d12807..bd128c2 100644
--- a/providers/mlx5/mlx5_vfio.c
+++ b/providers/mlx5/mlx5_vfio.c
@@ -1141,6 +1141,177 @@  out:
 	return err;
 }
 
+static int mlx5_vfio_get_caps_mode(struct mlx5_vfio_context *ctx,
+				   enum mlx5_cap_type cap_type,
+				   enum mlx5_cap_mode cap_mode)
+{
+	uint8_t in[DEVX_ST_SZ_BYTES(query_hca_cap_in)] = {};
+	int out_sz = DEVX_ST_SZ_BYTES(query_hca_cap_out);
+	void *out, *hca_caps;
+	uint16_t opmod = (cap_type << 1) | (cap_mode & 0x01);
+	int err;
+
+	out = calloc(1, out_sz);
+	if (!out) {
+		errno = ENOMEM;
+		return errno;
+	}
+
+	DEVX_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+	DEVX_SET(query_hca_cap_in, in, op_mod, opmod);
+	err = mlx5_vfio_cmd_exec(ctx, in, sizeof(in), out, out_sz, 0);
+	if (err)
+		goto query_ex;
+
+	hca_caps = DEVX_ADDR_OF(query_hca_cap_out, out, capability);
+
+	switch (cap_mode) {
+	case HCA_CAP_OPMOD_GET_MAX:
+		memcpy(ctx->caps.hca_max[cap_type], hca_caps,
+		       DEVX_UN_SZ_BYTES(hca_cap_union));
+		break;
+	case HCA_CAP_OPMOD_GET_CUR:
+		memcpy(ctx->caps.hca_cur[cap_type], hca_caps,
+		       DEVX_UN_SZ_BYTES(hca_cap_union));
+		break;
+	default:
+		err = EINVAL;
+		assert(false);
+		break;
+	}
+
+query_ex:
+	free(out);
+	return err;
+}
+
+enum mlx5_vport_roce_state {
+	MLX5_VPORT_ROCE_DISABLED = 0,
+	MLX5_VPORT_ROCE_ENABLED  = 1,
+};
+
+static int mlx5_vfio_nic_vport_update_roce_state(struct mlx5_vfio_context *ctx,
+						 enum mlx5_vport_roce_state state)
+{
+	uint32_t out[DEVX_ST_SZ_DW(modify_nic_vport_context_out)] = {};
+	int inlen = DEVX_ST_SZ_BYTES(modify_nic_vport_context_in);
+	void *in;
+	int err;
+
+	in = calloc(1, inlen);
+	if (!in) {
+		errno = ENOMEM;
+		return errno;
+	}
+
+	DEVX_SET(modify_nic_vport_context_in, in, field_select.roce_en, 1);
+	DEVX_SET(modify_nic_vport_context_in, in, nic_vport_context.roce_en,
+		 state);
+	DEVX_SET(modify_nic_vport_context_in, in, opcode,
+		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
+
+	err = mlx5_vfio_cmd_exec(ctx, in, inlen, out, sizeof(out), 0);
+
+	free(in);
+
+	return err;
+}
+
+static int mlx5_vfio_get_caps(struct mlx5_vfio_context *ctx, enum mlx5_cap_type cap_type)
+{
+	int ret;
+
+	ret = mlx5_vfio_get_caps_mode(ctx, cap_type, HCA_CAP_OPMOD_GET_CUR);
+	if (ret)
+		return ret;
+
+	return mlx5_vfio_get_caps_mode(ctx, cap_type, HCA_CAP_OPMOD_GET_MAX);
+}
+
+static int handle_hca_cap_roce(struct mlx5_vfio_context *ctx, void *set_ctx,
+			       int ctx_size)
+{
+	int err;
+	uint32_t out[DEVX_ST_SZ_DW(set_hca_cap_out)] = {};
+	void *set_hca_cap;
+
+	if (!MLX5_VFIO_CAP_GEN(ctx, roce))
+		return 0;
+
+	err = mlx5_vfio_get_caps(ctx, MLX5_CAP_ROCE);
+	if (err)
+		return err;
+
+	if (MLX5_VFIO_CAP_ROCE(ctx, sw_r_roce_src_udp_port) ||
+	    !MLX5_VFIO_CAP_ROCE_MAX(ctx, sw_r_roce_src_udp_port))
+		return 0;
+
+	set_hca_cap = DEVX_ADDR_OF(set_hca_cap_in, set_ctx, capability);
+	memcpy(set_hca_cap, ctx->caps.hca_cur[MLX5_CAP_ROCE],
+	       DEVX_ST_SZ_BYTES(roce_cap));
+	DEVX_SET(roce_cap, set_hca_cap, sw_r_roce_src_udp_port, 1);
+	DEVX_SET(set_hca_cap_in, set_ctx, opcode, MLX5_CMD_OP_SET_HCA_CAP);
+	DEVX_SET(set_hca_cap_in, set_ctx, op_mod, MLX5_SET_HCA_CAP_OP_MOD_ROCE);
+	return mlx5_vfio_cmd_exec(ctx, set_ctx, ctx_size, out, sizeof(out), 0);
+}
+
+static int handle_hca_cap(struct mlx5_vfio_context *ctx, void *set_ctx, int set_sz)
+{
+	struct mlx5_vfio_device *dev = to_mvfio_dev(ctx->vctx.context.device);
+	int sys_page_shift = ilog32(dev->page_size - 1);
+	uint32_t out[DEVX_ST_SZ_DW(set_hca_cap_out)] = {};
+	void *set_hca_cap;
+	int err;
+
+	err = mlx5_vfio_get_caps(ctx, MLX5_CAP_GENERAL);
+	if (err)
+		return err;
+
+	set_hca_cap = DEVX_ADDR_OF(set_hca_cap_in, set_ctx,
+				   capability);
+	memcpy(set_hca_cap, ctx->caps.hca_cur[MLX5_CAP_GENERAL],
+	       DEVX_ST_SZ_BYTES(cmd_hca_cap));
+
+	/* disable cmdif checksum */
+	DEVX_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0);
+
+	if (dev->flags & MLX5DV_VFIO_CTX_FLAGS_INIT_LINK_DOWN)
+		DEVX_SET(cmd_hca_cap, set_hca_cap, disable_link_up_by_init_hca, 1);
+
+	DEVX_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, sys_page_shift - 12);
+
+	if (MLX5_VFIO_CAP_GEN_MAX(ctx, mkey_by_name))
+		DEVX_SET(cmd_hca_cap, set_hca_cap, mkey_by_name, 1);
+
+	DEVX_SET(set_hca_cap_in, set_ctx, opcode, MLX5_CMD_OP_SET_HCA_CAP);
+	DEVX_SET(set_hca_cap_in, set_ctx, op_mod, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
+
+	return mlx5_vfio_cmd_exec(ctx, set_ctx, set_sz, out, sizeof(out), 0);
+}
+
+static int set_hca_cap(struct mlx5_vfio_context *ctx)
+{
+	int set_sz = DEVX_ST_SZ_BYTES(set_hca_cap_in);
+	void *set_ctx;
+	int err;
+
+	set_ctx = calloc(1, set_sz);
+	if (!set_ctx) {
+		errno = ENOMEM;
+		return errno;
+	}
+
+	err = handle_hca_cap(ctx, set_ctx, set_sz);
+	if (err)
+		goto out;
+
+	memset(set_ctx, 0, set_sz);
+	err = handle_hca_cap_roce(ctx, set_ctx, set_sz);
+out:
+	free(set_ctx);
+	return err;
+}
+
 static int mlx5_vfio_set_hca_ctrl(struct mlx5_vfio_context *ctx)
 {
 	struct mlx5_reg_host_endianness he_in = {};
@@ -1217,6 +1388,15 @@  static int mlx5_vfio_setup_function(struct mlx5_vfio_context *ctx)
 	if (err)
 		return err;
 
+	err = set_hca_cap(ctx);
+	if (err)
+		return err;
+
+	if (!MLX5_VFIO_CAP_GEN(ctx, umem_uid_0)) {
+		errno = EOPNOTSUPP;
+		return errno;
+	}
+
 	err = mlx5_vfio_satisfy_startup_pages(ctx, 0);
 	if (err)
 		return err;
@@ -1225,7 +1405,10 @@  static int mlx5_vfio_setup_function(struct mlx5_vfio_context *ctx)
 	if (err)
 		return err;
 
-	return 0;
+	if (MLX5_VFIO_CAP_GEN(ctx, port_type) == MLX5_CAP_PORT_TYPE_ETH)
+		err = mlx5_vfio_nic_vport_update_roce_state(ctx, MLX5_VPORT_ROCE_ENABLED);
+
+	return err;
 }
 
 static void mlx5_vfio_uninit_context(struct mlx5_vfio_context *ctx)
diff --git a/providers/mlx5/mlx5_vfio.h b/providers/mlx5/mlx5_vfio.h
index 36b1f40..225c1b9 100644
--- a/providers/mlx5/mlx5_vfio.h
+++ b/providers/mlx5/mlx5_vfio.h
@@ -12,6 +12,7 @@ 
 
 #include <infiniband/driver.h>
 #include <util/interval_set.h>
+#include "mlx5_ifc.h"
 
 #define FW_INIT_WAIT_MS 2
 #define FW_PRE_INIT_TIMEOUT_MILI 120000
@@ -43,6 +44,22 @@  struct mlx5_vfio_device {
 #error Host endianness not defined
 #endif
 
+/* GET Dev Caps macros */
+#define MLX5_VFIO_CAP_GEN(ctx, cap) \
+	DEVX_GET(cmd_hca_cap, ctx->caps.hca_cur[MLX5_CAP_GENERAL], cap)
+
+#define MLX5_VFIO_CAP_GEN_64(mdev, cap) \
+	DEVX_GET64(cmd_hca_cap, mdev->caps.hca_cur[MLX5_CAP_GENERAL], cap)
+
+#define MLX5_VFIO_CAP_GEN_MAX(ctx, cap) \
+	DEVX_GET(cmd_hca_cap, ctx->caps.hca_max[MLX5_CAP_GENERAL], cap)
+
+#define MLX5_VFIO_CAP_ROCE(ctx, cap) \
+	DEVX_GET(roce_cap, ctx->caps.hca_cur[MLX5_CAP_ROCE], cap)
+
+#define MLX5_VFIO_CAP_ROCE_MAX(ctx, cap) \
+	DEVX_GET(roce_cap, ctx->caps.hca_max[MLX5_CAP_ROCE], cap)
+
 struct mlx5_reg_host_endianness {
 	uint8_t he;
 	uint8_t rsvd[15];
@@ -162,6 +179,10 @@  struct mlx5_vfio_context {
 	size_t bar_map_size;
 	struct mlx5_vfio_cmd cmd;
 	bool have_eq;
+	struct {
+		uint32_t hca_cur[MLX5_CAP_NUM][DEVX_UN_SZ_DW(hca_cap_union)];
+		uint32_t hca_max[MLX5_CAP_NUM][DEVX_UN_SZ_DW(hca_cap_union)];
+	} caps;
 };
 
 static inline struct mlx5_vfio_device *to_mvfio_dev(struct ibv_device *ibdev)