Message ID | 20211109181117.591148-6-robdclark@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/msm: Cleanup and drm/sched tdr prep | expand |
On 11/9/2021 11:41 PM, Rob Clark wrote: > From: Rob Clark <robdclark@chromium.org> > > Add a debugfs interface to ignore hw error irqs, in order to force > fallback to sw hangcheck mechanism. Because the hw error detection is > pretty good on newer gens, we need this for igt tests to test the sw > hang detection. > > Signed-off-by: Rob Clark <robdclark@chromium.org> > --- > drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 6 ++++++ > drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 4 ++++ > drivers/gpu/drm/msm/msm_debugfs.c | 3 +++ > drivers/gpu/drm/msm/msm_drv.h | 9 +++++++++ > 4 files changed, 22 insertions(+) > > diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c > index 6163990a4d09..ec8e043c9d38 100644 > --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c > +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c > @@ -1252,6 +1252,7 @@ static void a5xx_fault_detect_irq(struct msm_gpu *gpu) > > static irqreturn_t a5xx_irq(struct msm_gpu *gpu) > { > + struct msm_drm_private *priv = gpu->dev->dev_private; > u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS); > > /* > @@ -1261,6 +1262,11 @@ static irqreturn_t a5xx_irq(struct msm_gpu *gpu) > gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD, > status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR); > > + if (priv->disable_err_irq) { > + status &= A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | > + A5XX_RBBM_INT_0_MASK_CP_SW; > + } > + > /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */ > if (status & RBBM_ERROR_MASK) > a5xx_rbbm_err_irq(gpu, status); > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > index 3d2da81cb2c9..8a2af3a27e33 100644 > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > @@ -1373,10 +1373,14 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) > > static irqreturn_t a6xx_irq(struct msm_gpu *gpu) > { > + struct msm_drm_private *priv = gpu->dev->dev_private; > u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS); > > gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status); > > + if (priv->disable_err_irq) > + status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS; > + > if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT) > a6xx_fault_detect_irq(gpu); > > diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c > index 6a99e8b5d25d..956b1efc3721 100644 > --- a/drivers/gpu/drm/msm/msm_debugfs.c > +++ b/drivers/gpu/drm/msm/msm_debugfs.c > @@ -242,6 +242,9 @@ void msm_debugfs_init(struct drm_minor *minor) > debugfs_create_u32("hangcheck_period_ms", 0600, minor->debugfs_root, > &priv->hangcheck_period); > > + debugfs_create_bool("disable_err_irq", 0600, minor->debugfs_root, > + &priv->disable_err_irq); > + > debugfs_create_file("shrink", S_IRWXU, minor->debugfs_root, > dev, &shrink_fops); > > diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h > index 2943c21d9aac..a8da7a7efb84 100644 > --- a/drivers/gpu/drm/msm/msm_drv.h > +++ b/drivers/gpu/drm/msm/msm_drv.h > @@ -246,6 +246,15 @@ struct msm_drm_private { > > /* For hang detection, in ms */ > unsigned int hangcheck_period; > + > + /** > + * disable_err_irq: > + * > + * Disable handling of GPU hw error interrupts, to force fallback to > + * sw hangcheck timer. Written (via debugfs) by igt tests to test > + * the sw hangcheck mechanism. > + */ > + bool disable_err_irq; > }; > > struct msm_format { > Reviewed-by: Akhil P Oommen <akhilpo@codeaurora.org> -Akhil.
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 6163990a4d09..ec8e043c9d38 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -1252,6 +1252,7 @@ static void a5xx_fault_detect_irq(struct msm_gpu *gpu) static irqreturn_t a5xx_irq(struct msm_gpu *gpu) { + struct msm_drm_private *priv = gpu->dev->dev_private; u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS); /* @@ -1261,6 +1262,11 @@ static irqreturn_t a5xx_irq(struct msm_gpu *gpu) gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD, status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR); + if (priv->disable_err_irq) { + status &= A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | + A5XX_RBBM_INT_0_MASK_CP_SW; + } + /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */ if (status & RBBM_ERROR_MASK) a5xx_rbbm_err_irq(gpu, status); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 3d2da81cb2c9..8a2af3a27e33 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1373,10 +1373,14 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu) static irqreturn_t a6xx_irq(struct msm_gpu *gpu) { + struct msm_drm_private *priv = gpu->dev->dev_private; u32 status = gpu_read(gpu, REG_A6XX_RBBM_INT_0_STATUS); gpu_write(gpu, REG_A6XX_RBBM_INT_CLEAR_CMD, status); + if (priv->disable_err_irq) + status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS; + if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT) a6xx_fault_detect_irq(gpu); diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c index 6a99e8b5d25d..956b1efc3721 100644 --- a/drivers/gpu/drm/msm/msm_debugfs.c +++ b/drivers/gpu/drm/msm/msm_debugfs.c @@ -242,6 +242,9 @@ void msm_debugfs_init(struct drm_minor *minor) debugfs_create_u32("hangcheck_period_ms", 0600, minor->debugfs_root, &priv->hangcheck_period); + debugfs_create_bool("disable_err_irq", 0600, minor->debugfs_root, + &priv->disable_err_irq); + debugfs_create_file("shrink", S_IRWXU, minor->debugfs_root, dev, &shrink_fops); diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 2943c21d9aac..a8da7a7efb84 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -246,6 +246,15 @@ struct msm_drm_private { /* For hang detection, in ms */ unsigned int hangcheck_period; + + /** + * disable_err_irq: + * + * Disable handling of GPU hw error interrupts, to force fallback to + * sw hangcheck timer. Written (via debugfs) by igt tests to test + * the sw hangcheck mechanism. + */ + bool disable_err_irq; }; struct msm_format {