@@ -491,7 +491,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
if (ret)
return ret;
- return amdgpu_sync_fence(sync, vm->last_update);
+ return amdgpu_sync_fence(sync, vm->last_update, __func__);
}
static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
@@ -1251,7 +1251,7 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem,
amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
- amdgpu_sync_fence(sync, bo_va->last_pt_update);
+ amdgpu_sync_fence(sync, bo_va->last_pt_update, __func__);
}
static int update_gpuvm_pte(struct kgd_mem *mem,
@@ -1273,7 +1273,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
return ret;
}
- return amdgpu_sync_fence(sync, bo_va->last_pt_update);
+ return amdgpu_sync_fence(sync, bo_va->last_pt_update, __func__);
}
static int map_bo_to_gpuvm(struct kgd_mem *mem,
@@ -2910,7 +2910,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
}
dma_resv_for_each_fence(&cursor, bo->tbo.base.resv,
DMA_RESV_USAGE_KERNEL, fence) {
- ret = amdgpu_sync_fence(&sync_obj, fence);
+ ret = amdgpu_sync_fence(&sync_obj, fence, __func__);
if (ret) {
pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
goto validate_map_fail;
@@ -423,7 +423,7 @@ static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,
dma_fence_put(old);
}
- r = amdgpu_sync_fence(&p->sync, fence);
+ r = amdgpu_sync_fence(&p->sync, fence, __func__);
dma_fence_put(fence);
if (r)
return r;
@@ -445,7 +445,7 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,
return r;
}
- r = amdgpu_sync_fence(&p->sync, fence);
+ r = amdgpu_sync_fence(&p->sync, fence, __func__);
dma_fence_put(fence);
return r;
}
@@ -1101,7 +1101,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update);
+ r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update, __func__);
if (r)
return r;
@@ -1112,7 +1112,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update);
+ r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update, __func__);
if (r)
return r;
}
@@ -1131,7 +1131,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update);
+ r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update, __func__);
if (r)
return r;
}
@@ -1144,7 +1144,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_fence(&p->sync, vm->last_update);
+ r = amdgpu_sync_fence(&p->sync, vm->last_update, __func__);
if (r)
return r;
@@ -1225,7 +1225,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
continue;
}
- r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence);
+ r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence, __func__);
dma_fence_put(fence);
if (r)
return r;
@@ -309,7 +309,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
/* Good we can use this VMID. Remember this submission as
* user of the VMID.
*/
- r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished);
+ r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished, __func__);
if (r)
return r;
@@ -369,8 +369,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
/* Good, we can use this VMID. Remember this submission as
* user of the VMID.
*/
- r = amdgpu_sync_fence(&(*id)->active,
- &job->base.s_fence->finished);
+ r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished, __func__);
if (r)
return r;
@@ -421,8 +420,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
id = idle;
/* Remember this submission as user of the VMID */
- r = amdgpu_sync_fence(&id->active,
- &job->base.s_fence->finished);
+ r = amdgpu_sync_fence(&id->active, &job->base.s_fence->finished, __func__);
if (r)
goto error;
@@ -1219,14 +1219,14 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
DRM_ERROR("failed to do vm_bo_update on meta data\n");
goto error_del_bo_va;
}
- amdgpu_sync_fence(&sync, bo_va->last_pt_update);
+ amdgpu_sync_fence(&sync, bo_va->last_pt_update, __func__);
r = amdgpu_vm_update_pdes(adev, vm, false);
if (r) {
DRM_ERROR("failed to update pdes on meta data\n");
goto error_del_bo_va;
}
- amdgpu_sync_fence(&sync, vm->last_update);
+ amdgpu_sync_fence(&sync, vm->last_update, __func__);
amdgpu_sync_wait(&sync, false);
drm_exec_fini(&exec);
@@ -30,6 +30,7 @@
*/
#include <linux/dma-fence-chain.h>
+#include <trace/events/dma_fence.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
@@ -149,10 +150,12 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
*
* @sync: sync object to add fence to
* @f: fence to sync to
+ * @reason: why do we sync to this fence
*
* Add the fence to the sync object.
*/
-int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
+int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
+ const char *reason)
{
struct amdgpu_sync_entry *e;
@@ -166,6 +169,8 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
if (!e)
return -ENOMEM;
+ trace_dma_fence_sync_to(f, reason);
+
hash_add(sync->fences, &e->node, f->context);
e->fence = dma_fence_get(f);
return 0;
@@ -249,7 +254,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct dma_fence *tmp = dma_fence_chain_contained(f);
if (amdgpu_sync_test_fence(adev, mode, owner, tmp)) {
- r = amdgpu_sync_fence(sync, f);
+ r = amdgpu_sync_fence(sync, f, __func__);
dma_fence_put(f);
if (r)
return r;
@@ -358,7 +363,7 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
hash_for_each_safe(source->fences, i, tmp, e, node) {
f = e->fence;
if (!dma_fence_is_signaled(f)) {
- r = amdgpu_sync_fence(clone, f);
+ r = amdgpu_sync_fence(clone, f, __func__);
if (r)
return r;
} else {
@@ -47,7 +47,8 @@ struct amdgpu_sync {
};
void amdgpu_sync_create(struct amdgpu_sync *sync);
-int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f);
+int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
+ const char *reason);
int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct dma_resv *resv, enum amdgpu_sync_mode mode,
void *owner);
@@ -117,13 +117,13 @@ static int map_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (r)
goto error_del_bo_va;
- amdgpu_sync_fence(&sync, (*bo_va)->last_pt_update);
+ amdgpu_sync_fence(&sync, (*bo_va)->last_pt_update, __func__);
r = amdgpu_vm_update_pdes(adev, vm, false);
if (r)
goto error_del_bo_va;
- amdgpu_sync_fence(&sync, vm->last_update);
+ amdgpu_sync_fence(&sync, vm->last_update, __func__);
amdgpu_sync_wait(&sync, false);
drm_exec_fini(&exec);
This makes it possible to understand the dependencies between jobs. Possible usage of this trace: * stuttering issues like Mesa !9189 * incorrect synchronization: I don't have a link for this one, but having these events was very useful to debug a virtio-gpu / native-context / radeonsi sync issue I have prototype code using this in UMR, as can be see here: https://gitlab.freedesktop.org/tomstdenis/umr/-/merge_requests/37 The 'reason' param currently uses __func__ but I didn't add a macro for this because it'd be interesting to use more descriptive names for each use of amdgpu_fence_sync. Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 14 +++++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 8 +++----- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 11 ++++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c | 4 ++-- 7 files changed, 28 insertions(+), 24 deletions(-)