diff mbox series

[3/3] drm/etnaviv: take current primitive into account when checking for hung GPU

Message ID 20240628104745.2602036-3-l.stach@pengutronix.de (mailing list archive)
State New, archived
Headers show
Series [1/3] drm/etnaviv: move debug register en-/disable into own function | expand

Commit Message

Lucas Stach June 28, 2024, 10:47 a.m. UTC
Large draws can make the GPU appear to be stuck to the current hangcheck
logic as the FE address will not move until the draw is finished. However,
the FE has a debug register, which records the current primitive ID within
a draw. Using this debug register we can extend the timeout as long as the
draw progresses.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_gpu.h   |  1 +
 drivers/gpu/drm/etnaviv/etnaviv_sched.c | 16 ++++++++++++++--
 2 files changed, 15 insertions(+), 2 deletions(-)

Comments

Philipp Zabel June 28, 2024, 11:08 a.m. UTC | #1
On Fr, 2024-06-28 at 12:47 +0200, Lucas Stach wrote:
> Large draws can make the GPU appear to be stuck to the current hangcheck
> logic as the FE address will not move until the draw is finished. However,
> the FE has a debug register, which records the current primitive ID within
> a draw. Using this debug register we can extend the timeout as long as the
> draw progresses.
> 
> Signed-off-by: Lucas Stach <l.stach@pengutronix.de>

Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>

regards
Philipp
diff mbox series

Patch

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index 07a6c66e0005..079e03f511ec 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -145,6 +145,7 @@  struct etnaviv_gpu {
 
 	/* hang detection */
 	u32 hangcheck_dma_addr;
+	u32 hangcheck_primid;
 	u32 hangcheck_fence;
 
 	void __iomem *mmio;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index 62dcfdc7894d..077645df34ac 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -11,6 +11,7 @@ 
 #include "etnaviv_gpu.h"
 #include "etnaviv_sched.h"
 #include "state.xml.h"
+#include "state_hi.xml.h"
 
 static int etnaviv_job_hang_limit = 0;
 module_param_named(job_hang_limit, etnaviv_job_hang_limit, int , 0444);
@@ -35,7 +36,7 @@  static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job
 {
 	struct etnaviv_gem_submit *submit = to_etnaviv_submit(sched_job);
 	struct etnaviv_gpu *gpu = submit->gpu;
-	u32 dma_addr;
+	u32 dma_addr, primid = 0;
 	int change;
 
 	/*
@@ -52,10 +53,21 @@  static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job
 	 */
 	dma_addr = gpu_read(gpu, VIVS_FE_DMA_ADDRESS);
 	change = dma_addr - gpu->hangcheck_dma_addr;
+	if (submit->exec_state == ETNA_PIPE_3D) {
+		etnaviv_gpu_enable_debug_regs(gpu);
+		gpu_write(gpu, VIVS_MC_PROFILE_CONFIG0,
+			  VIVS_MC_PROFILE_CONFIG0_FE_CURRENT_PRIM <<
+			  VIVS_MC_PROFILE_CONFIG0_FE__SHIFT);
+		primid = gpu_read(gpu, VIVS_MC_PROFILE_FE_READ);
+		etnaviv_gpu_disable_debug_regs(gpu);
+	}
 	if (gpu->state == ETNA_GPU_STATE_RUNNING &&
 	    (gpu->completed_fence != gpu->hangcheck_fence ||
-	     change < 0 || change > 16)) {
+	     change < 0 || change > 16 ||
+	     (submit->exec_state == ETNA_PIPE_3D &&
+	      gpu->hangcheck_primid != primid))) {
 		gpu->hangcheck_dma_addr = dma_addr;
+		gpu->hangcheck_primid = primid;
 		gpu->hangcheck_fence = gpu->completed_fence;
 		goto out_no_timeout;
 	}