@@ -12,6 +12,8 @@
#include <linux/pci.h>
#include <linux/slab.h>
+#include <trace/events/habanalabs_cn.h>
+
#define NIC_MIN_WQS_PER_PORT 2
#define NIC_SEQ_RESETS_TIMEOUT_MS 15000 /* 15 seconds */
@@ -5892,8 +5894,15 @@ void *__hbl_cn_dma_alloc_coherent(struct hbl_cn_device *hdev, size_t size, dma_a
gfp_t flag, const char *caller)
{
const struct hbl_cn_asic_funcs *asic_funcs = hdev->asic_funcs;
+ void *ptr;
+
+ ptr = asic_funcs->dma_alloc_coherent(hdev, size, dma_handle, flag);
- return asic_funcs->dma_alloc_coherent(hdev, size, dma_handle, flag);
+ if (trace_habanalabs_cn_dma_alloc_coherent_enabled())
+ trace_habanalabs_cn_dma_alloc_coherent(hdev->dev, (u64)(uintptr_t)ptr, *dma_handle,
+ size, caller);
+
+ return ptr;
}
void __hbl_cn_dma_free_coherent(struct hbl_cn_device *hdev, size_t size, void *cpu_addr,
@@ -5902,14 +5911,25 @@ void __hbl_cn_dma_free_coherent(struct hbl_cn_device *hdev, size_t size, void *c
const struct hbl_cn_asic_funcs *asic_funcs = hdev->asic_funcs;
asic_funcs->dma_free_coherent(hdev, size, cpu_addr, dma_addr);
+
+ if (trace_habanalabs_cn_dma_free_coherent_enabled())
+ trace_habanalabs_cn_dma_free_coherent(hdev->dev, (u64)(uintptr_t)cpu_addr, dma_addr,
+ size, caller);
}
void *__hbl_cn_dma_pool_zalloc(struct hbl_cn_device *hdev, size_t size, gfp_t mem_flags,
dma_addr_t *dma_handle, const char *caller)
{
const struct hbl_cn_asic_funcs *asic_funcs = hdev->asic_funcs;
+ void *ptr;
- return asic_funcs->dma_pool_zalloc(hdev, size, mem_flags, dma_handle);
+ ptr = asic_funcs->dma_pool_zalloc(hdev, size, mem_flags, dma_handle);
+
+ if (trace_habanalabs_cn_dma_pool_zalloc_enabled())
+ trace_habanalabs_cn_dma_pool_zalloc(hdev->dev, (u64)(uintptr_t)ptr, *dma_handle,
+ size, caller);
+
+ return ptr;
}
void __hbl_cn_dma_pool_free(struct hbl_cn_device *hdev, void *vaddr, dma_addr_t dma_addr,
@@ -5918,6 +5938,10 @@ void __hbl_cn_dma_pool_free(struct hbl_cn_device *hdev, void *vaddr, dma_addr_t
const struct hbl_cn_asic_funcs *asic_funcs = hdev->asic_funcs;
asic_funcs->dma_pool_free(hdev, vaddr, dma_addr);
+
+ if (trace_habanalabs_cn_dma_pool_free_enabled())
+ trace_habanalabs_cn_dma_pool_free(hdev->dev, (u64)(uintptr_t)vaddr, dma_addr, 0,
+ caller);
}
int hbl_cn_get_reg_pcie_addr(struct hbl_cn_device *hdev, u8 bar_id, u32 reg, u64 *pci_addr)
@@ -12,6 +12,9 @@
#include <linux/auxiliary_bus.h>
#include <linux/sched/clock.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/habanalabs_cn.h>
+
#define HBL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team"
#define HBL_DRIVER_DESC "HabanaLabs AI accelerators Core Network driver"
@@ -6,6 +6,7 @@
#include <linux/vmalloc.h>
#include "hbl_cn.h"
+#include <trace/events/habanalabs_cn.h>
static int hbl_cn_map_vmalloc_range(struct hbl_cn_ctx *ctx, u64 vmalloc_va, u64 device_va,
u64 size)
@@ -201,12 +202,16 @@ static struct hbl_cn_mem_buf *cn_mem_buf_alloc(struct hbl_cn_ctx *ctx, gfp_t gfp
static int cn_mem_alloc(struct hbl_cn_ctx *ctx, struct hbl_cn_mem_data *mem_data)
{
+ struct hbl_cn_device *hdev = ctx->hdev;
struct hbl_cn_mem_buf *buf;
buf = cn_mem_buf_alloc(ctx, GFP_KERNEL, mem_data);
if (!buf)
return -ENOMEM;
+ trace_habanalabs_cn_mem_alloc(hdev->dev, buf->mem_id, buf->handle, (u64)buf->kernel_address,
+ buf->bus_address, buf->device_va, buf->mappable_size);
+
mem_data->handle = buf->handle;
if (mem_data->mem_id == HBL_CN_DRV_MEM_HOST_DMA_COHERENT)
@@ -242,6 +247,10 @@ int hbl_cn_mem_alloc(struct hbl_cn_ctx *ctx, struct hbl_cn_mem_data *mem_data)
static void cn_mem_buf_destroy(struct hbl_cn_mem_buf *buf)
{
+ trace_habanalabs_cn_mem_destroy(buf->ctx->hdev->dev, buf->mem_id, buf->handle,
+ (u64)buf->kernel_address, buf->bus_address, buf->device_va,
+ buf->mappable_size);
+
if (buf->device_va)
hbl_cn_unmap_vmalloc_range(buf->ctx, buf->device_va, buf->mappable_size);
new file mode 100644
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2023 HabanaLabs, Ltd.
+ * Copyright (C) 2023-2024, Intel Corporation.
+ * All Rights Reserved.
+ *
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM habanalabs_cn
+
+#if !defined(_TRACE_HABANALABS_CN_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_HABANALABS_CN_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(habanalabs_cn_mem_template,
+ TP_PROTO(struct device *dev, u32 mem_id, u64 handle, u64 kernel_addr, u64 bus_addr,
+ u64 device_va, size_t size),
+
+ TP_ARGS(dev, mem_id, handle, kernel_addr, bus_addr, device_va, size),
+
+ TP_STRUCT__entry(
+ __string(dname, dev_name(dev))
+ __field(u32, mem_id)
+ __field(u64, handle)
+ __field(u64, kernel_addr)
+ __field(u64, bus_addr)
+ __field(u64, device_va)
+ __field(u32, size)
+ ),
+
+ TP_fast_assign(
+ __assign_str(dname);
+ __entry->mem_id = mem_id;
+ __entry->handle = handle;
+ __entry->kernel_addr = kernel_addr;
+ __entry->bus_addr = bus_addr;
+ __entry->device_va = device_va;
+ __entry->size = size;
+ ),
+
+ TP_printk("%s: mem_id: %#x, handle: %#llx, kernel_addr: %#llx, bus_addr: %#llx, device_va: %#llx, size: %#x",
+ __get_str(dname),
+ __entry->mem_id,
+ __entry->handle,
+ __entry->kernel_addr,
+ __entry->bus_addr,
+ __entry->device_va,
+ __entry->size)
+);
+
+DEFINE_EVENT(habanalabs_cn_mem_template, habanalabs_cn_mem_alloc,
+ TP_PROTO(struct device *dev, u32 mem_id, u64 handle, u64 kernel_addr, u64 bus_addr,
+ u64 device_va, size_t size),
+ TP_ARGS(dev, mem_id, handle, kernel_addr, bus_addr, device_va, size));
+
+DEFINE_EVENT(habanalabs_cn_mem_template, habanalabs_cn_mem_destroy,
+ TP_PROTO(struct device *dev, u32 mem_id, u64 handle, u64 kernel_addr, u64 bus_addr,
+ u64 device_va, size_t size),
+ TP_ARGS(dev, mem_id, handle, kernel_addr, bus_addr, device_va, size));
+
+DECLARE_EVENT_CLASS(habanalabs_cn_dma_alloc_template,
+ TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size, const char *caller),
+
+ TP_ARGS(dev, cpu_addr, dma_addr, size, caller),
+
+ TP_STRUCT__entry(
+ __string(dname, dev_name(dev))
+ __field(u64, cpu_addr)
+ __field(u64, dma_addr)
+ __field(u32, size)
+ __field(const char *, caller)
+ ),
+
+ TP_fast_assign(
+ __assign_str(dname);
+ __entry->cpu_addr = cpu_addr;
+ __entry->dma_addr = dma_addr;
+ __entry->size = size;
+ __entry->caller = caller;
+ ),
+
+ TP_printk("%s: cpu_addr: %#llx, dma_addr: %#llx, size: %#x, caller: %s",
+ __get_str(dname),
+ __entry->cpu_addr,
+ __entry->dma_addr,
+ __entry->size,
+ __entry->caller
+ )
+);
+
+DEFINE_EVENT(habanalabs_cn_dma_alloc_template, habanalabs_cn_dma_alloc_coherent,
+ TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size,
+ const char *caller),
+ TP_ARGS(dev, cpu_addr, dma_addr, size, caller));
+
+DEFINE_EVENT(habanalabs_cn_dma_alloc_template, habanalabs_cn_dma_free_coherent,
+ TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size,
+ const char *caller),
+ TP_ARGS(dev, cpu_addr, dma_addr, size, caller));
+
+DEFINE_EVENT(habanalabs_cn_dma_alloc_template, habanalabs_cn_dma_pool_zalloc,
+ TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size,
+ const char *caller),
+ TP_ARGS(dev, cpu_addr, dma_addr, size, caller));
+
+DEFINE_EVENT(habanalabs_cn_dma_alloc_template, habanalabs_cn_dma_pool_free,
+ TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size,
+ const char *caller),
+ TP_ARGS(dev, cpu_addr, dma_addr, size, caller));
+
+#endif /* if !defined(_TRACE_HABANALABS_CN_H) || defined(TRACE_HEADER_MULTI_READ) */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>