@@ -55,7 +55,11 @@ struct mlx5_alloc_ucontext {
__u32 total_num_uuars;
__u32 num_low_latency_uuars;
__u32 flags;
- __u32 reserved;
+ __u32 comp_mask;
+};
+
+enum mlx5_ib_alloc_ucontext_resp_mask {
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0,
};
struct mlx5_alloc_ucontext_resp {
@@ -72,6 +76,10 @@ struct mlx5_alloc_ucontext_resp {
__u16 num_ports;
__u8 cqe_version;
__u8 reserved;
+ __u32 comp_mask;
+ __u32 response_length;
+ __u32 reserved2;
+ __u64 hca_core_clock_offset;
};
struct mlx5_alloc_pd_resp {
@@ -524,6 +524,30 @@ static int single_threaded_app(void)
return 0;
}
+static int mlx5_map_internal_clock(struct mlx5_device *mdev,
+ struct ibv_context *ibv_ctx)
+{
+ struct mlx5_context *context = to_mctx(ibv_ctx);
+ void *hca_clock_page;
+ off_t offset = 0;
+
+ set_command(MLX5_MMAP_GET_CORE_CLOCK_CMD, &offset);
+ hca_clock_page = mmap(NULL, mdev->page_size,
+ PROT_READ, MAP_SHARED, ibv_ctx->cmd_fd,
+ mdev->page_size * offset);
+
+ if (hca_clock_page == MAP_FAILED) {
+ fprintf(stderr, PFX
+ "Warning: Timestamp available,\n"
+ "but failed to mmap() hca core clock page.\n");
+ return -1;
+ }
+
+ context->hca_core_clock = hca_clock_page +
+ (context->core_clock.offset & (mdev->page_size - 1));
+ return 0;
+}
+
static int mlx5_init_context(struct verbs_device *vdev,
struct ibv_context *ctx, int cmd_fd)
{
@@ -647,6 +671,15 @@ static int mlx5_init_context(struct verbs_device *vdev,
context->bfs[j].uuarn = j;
}
+ context->hca_core_clock = NULL;
+ if (resp.response_length + sizeof(resp.ibv_resp) >=
+ offsetof(struct mlx5_alloc_ucontext_resp, hca_core_clock_offset) +
+ sizeof(resp.hca_core_clock_offset) &&
+ resp.comp_mask & MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET) {
+ context->core_clock.offset = resp.hca_core_clock_offset;
+ mlx5_map_internal_clock(mdev, ctx);
+ }
+
mlx5_spinlock_init(&context->lock32);
context->prefer_bf = get_always_bf();
@@ -664,6 +697,7 @@ static int mlx5_init_context(struct verbs_device *vdev,
verbs_set_ctx_op(v_ctx, create_srq_ex, mlx5_create_srq_ex);
verbs_set_ctx_op(v_ctx, get_srq_num, mlx5_get_srq_num);
verbs_set_ctx_op(v_ctx, query_device_ex, mlx5_query_device_ex);
+ verbs_set_ctx_op(v_ctx, query_values, mlx5_query_values);
verbs_set_ctx_op(v_ctx, create_cq_ex, mlx5_create_cq_ex);
if (context->cqe_version && context->cqe_version == 1)
verbs_set_ctx_op(v_ctx, poll_cq_ex, mlx5_poll_cq_v1_ex);
@@ -697,6 +731,9 @@ static void mlx5_cleanup_context(struct verbs_device *device,
if (context->uar[i])
munmap(context->uar[i], page_size);
}
+ if (context->hca_core_clock)
+ munmap(context->hca_core_clock - context->core_clock.offset,
+ page_size);
close_debug_file(context);
}
@@ -117,7 +117,8 @@ enum {
enum {
MLX5_MMAP_GET_REGULAR_PAGES_CMD = 0,
- MLX5_MMAP_GET_CONTIGUOUS_PAGES_CMD = 1
+ MLX5_MMAP_GET_CONTIGUOUS_PAGES_CMD = 1,
+ MLX5_MMAP_GET_CORE_CLOCK_CMD = 5
};
#define MLX5_CQ_PREFIX "MLX_CQ"
@@ -307,6 +308,11 @@ struct mlx5_context {
struct mlx5_spinlock hugetlb_lock;
struct list_head hugetlb_list;
uint8_t cqe_version;
+ struct {
+ uint64_t offset;
+ uint64_t mask;
+ } core_clock;
+ void *hca_core_clock;
};
struct mlx5_bitmap {
@@ -585,6 +591,8 @@ int mlx5_query_device_ex(struct ibv_context *context,
const struct ibv_query_device_ex_input *input,
struct ibv_device_attr_ex *attr,
size_t attr_size);
+int mlx5_query_values(struct ibv_context *context,
+ struct ibv_values_ex *values);
struct ibv_qp *mlx5_create_qp_ex(struct ibv_context *context,
struct ibv_qp_init_attr_ex *attr);
int mlx5_query_port(struct ibv_context *context, uint8_t port,
@@ -79,6 +79,52 @@ int mlx5_query_device(struct ibv_context *context, struct ibv_device_attr *attr)
return 0;
}
+#define READL(ptr) (*((uint32_t *)(ptr)))
+static int mlx5_read_clock(struct ibv_context *context, uint64_t *cycles)
+{
+ unsigned int clockhi, clocklo, clockhi1;
+ int i;
+ struct mlx5_context *ctx = to_mctx(context);
+
+ if (!ctx->hca_core_clock)
+ return -EOPNOTSUPP;
+
+ /* Handle wraparound */
+ for (i = 0; i < 2; i++) {
+ clockhi = ntohl(READL(ctx->hca_core_clock));
+ clocklo = ntohl(READL(ctx->hca_core_clock + 4));
+ clockhi1 = ntohl(READL(ctx->hca_core_clock));
+ if (clockhi == clockhi1)
+ break;
+ }
+
+ *cycles = (uint64_t)clockhi << 32 | (uint64_t)clocklo;
+
+ return 0;
+}
+
+int mlx5_query_values(struct ibv_context *context,
+ struct ibv_values_ex *values)
+{
+ uint32_t comp_mask = 0;
+ int err = 0;
+
+ if (values->comp_mask & IBV_VALUES_MASK_RAW_CLOCK) {
+ uint64_t cycles;
+
+ err = mlx5_read_clock(context, &cycles);
+ if (!err) {
+ values->raw_clock.tv_sec = 0;
+ values->raw_clock.tv_nsec = cycles;
+ comp_mask |= IBV_VALUES_MASK_RAW_CLOCK;
+ }
+ }
+
+ values->comp_mask = comp_mask;
+
+ return err;
+}
+
int mlx5_query_port(struct ibv_context *context, uint8_t port,
struct ibv_port_attr *attr)
{
In order to query the current HCA's core clock, libmlx5 should support ibv_query_values verb. Querying the hardware's cycles register is done by mmaping this register to user-space. Therefore, when libmlx5 initializes we mmap the cycles register. This assumes the machine's architecture places the PCI and memory in the same address space. The page offset is passed through init_context vendor's data. Signed-off-by: Matan Barak <matanb@mellanox.com> --- src/mlx5-abi.h | 10 +++++++++- src/mlx5.c | 37 +++++++++++++++++++++++++++++++++++++ src/mlx5.h | 10 +++++++++- src/verbs.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 101 insertions(+), 2 deletions(-)