diff mbox series

[v3] drm/msm: Check for powered down HW in the devfreq callbacks

Message ID 20200501194326.14593-1-jcrouse@codeaurora.org (mailing list archive)
State Accepted
Commit eadf79286a4badebc95af7061530bdb50a7e6f38
Headers show
Series [v3] drm/msm: Check for powered down HW in the devfreq callbacks | expand

Commit Message

Jordan Crouse May 1, 2020, 7:43 p.m. UTC
Writing to the devfreq sysfs nodes while the GPU is powered down can
result in a system crash (on a5xx) or a nasty GMU error (on a6xx):

 $ /sys/class/devfreq/5000000.gpu# echo 500000000 > min_freq
  [  104.841625] platform 506a000.gmu: [drm:a6xx_gmu_set_oob]
	*ERROR* Timeout waiting for GMU OOB set GPU_DCVS: 0x0

Despite the fact that we carefully try to suspend the devfreq device when
the hardware is powered down there are lots of holes in the governors that
don't check for the suspend state and blindly call into the devfreq
callbacks that end up triggering hardware reads in the GPU driver.

Call pm_runtime_get_if_in_use() in the gpu_busy() and gpu_set_freq()
callbacks to skip the hardware access if it isn't active.

v3: Only check pm_runtime_get_if_in_use() for == 0 per Eric Anholt
v2: Use pm_runtime_get_if_in_use() per Eric Anholt

Cc: stable@vger.kernel.org
Reviewed-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---

 drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 6 ++++++
 drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 8 ++++++++
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 7 +++++++
 3 files changed, 21 insertions(+)

Comments

Sasha Levin May 6, 2020, 11:42 p.m. UTC | #1
Hi

[This is an automated email]

This commit has been processed because it contains a -stable tag.
The stable tag indicates that it's relevant for the following trees: all

The bot has tested the following trees: v5.6.8, v5.4.36, v4.19.119, v4.14.177, v4.9.220, v4.4.220.

v5.6.8: Build OK!
v5.4.36: Build OK!
v4.19.119: Failed to apply! Possible dependencies:
    16f37102181e ("drm/msm: a6xx: Fix improper u64 division")
    a2c3c0a54d4c ("drm/msm/a6xx: Add devfreq support for a6xx")
    de0a3d094de0 ("drm/msm: re-factor devfreq code")
    f926a2e1718e ("drm/msm: a5xx: Fix improper u64 division")
    fcf9d0b7d2f5 ("drm/msm/a6xx: Add support for an interconnect path")

v4.14.177: Failed to apply! Possible dependencies:
    4c7085a5d581 ("drm/msm: Shadow current pointer in the ring until command is complete")
    b1fc2839d2f9 ("drm/msm: Implement preemption for A5XX targets")
    c09513cfebd8 ("drm/msm/adreno: a5xx: Explicitly program the CP0 performance counter")
    cd414f3d9316 ("drm/msm: Move memptrs to msm_gpu")
    eec874ce5ff1 ("drm/msm/adreno: load gpu at probe/bind time")
    f7de15450e90 ("drm/msm: Add per-instance submit queues")
    f91c14ab448a ("drm/msm: Add devfreq support for the GPU")
    f97decac5f4c ("drm/msm: Support multiple ringbuffers")

v4.9.220: Failed to apply! Possible dependencies:
    1cec20f0ea0e ("dma-buf: Restart reservation_object_wait_timeout_rcu() after writes")
    667ce33e57d0 ("drm/msm: support multiple address spaces")
    78010cd9736e ("dma-buf/fence: add an lockdep_assert_held()")
    89d777a57245 ("drm/msm: Remove 'src_clk' from adreno configuration")
    b5f103ab98c7 ("drm/msm: gpu: Add A5XX target support")
    c09513cfebd8 ("drm/msm/adreno: a5xx: Explicitly program the CP0 performance counter")
    f54d1867005c ("dma-buf: Rename struct fence to dma_fence")
    f91c14ab448a ("drm/msm: Add devfreq support for the GPU")
    fb0399819239 ("drm/msm: Add adreno_gpu_write64()")
    fedf54132d24 ("dma-buf: Restart reservation_object_get_fences_rcu() after writes")

v4.4.220: Failed to apply! Possible dependencies:
    1aaa57f5d4b6 ("drm/msm: Free fb helper resources in msm_unload")
    1dd0a0b18697 ("drm/msm/mdp5: Create a separate MDP5 device")
    2b669875332f ("drm/msm: Drop load/unload drm_driver ops")
    357ff00b08d6 ("drm/msm/adreno: support for adreno 430.")
    667ce33e57d0 ("drm/msm: support multiple address spaces")
    7d0c5ee9f077 ("drm/msm/adreno: get CP_RPTR from register instead of shadow memory")
    8208ed931eea ("drm/msm: Centralize connector registration/unregistration")
    89d777a57245 ("drm/msm: Remove 'src_clk' from adreno configuration")
    990a40079a55 ("drm/msm/mdp5: Add MDSS top level driver")
    a2b3a5571f38 ("drm/msm: Get irq number within kms driver itself")
    a3ccfb9feb46 ("drm/msm: Rename async to nonblock.")
    a5725ab0497a ("drm/msm/adreno: move function declarations to header file")
    b5f103ab98c7 ("drm/msm: gpu: Add A5XX target support")
    ba00c3f2f0c8 ("drm/msm: remove fence_cbs")
    c09513cfebd8 ("drm/msm/adreno: a5xx: Explicitly program the CP0 performance counter")
    ca762a8ae7f4 ("drm/msm: introduce msm_fence_context")
    cd79272696ef ("drm/msm: Call pm_runtime_enable/disable for newly created devices")
    edcd60ce243d ("drm/msm: move debugfs code to it's own file")
    f759020530e8 ("drm/msm/mdp: Detach iommu in mdp4_destroy")
    f91c14ab448a ("drm/msm: Add devfreq support for the GPU")
    fb0399819239 ("drm/msm: Add adreno_gpu_write64()")
    fde5de6cb461 ("drm/msm: move fence code to it's own file")


NOTE: The patch will not be queued to stable trees until it is upstream.

How should we proceed with this patch?
diff mbox series

Patch

diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 724024a2243a..662d02289533 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -1404,6 +1404,10 @@  static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
 {
 	u64 busy_cycles, busy_time;
 
+	/* Only read the gpu busy if the hardware is already active */
+	if (pm_runtime_get_if_in_use(&gpu->pdev->dev) == 0)
+		return 0;
+
 	busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
 			REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
 
@@ -1412,6 +1416,8 @@  static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
 
 	gpu->devfreq.busy_cycles = busy_cycles;
 
+	pm_runtime_put(&gpu->pdev->dev);
+
 	if (WARN_ON(busy_time > ~0LU))
 		return ~0LU;
 
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index c4e71abbdd53..34607a98cc7c 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -108,6 +108,13 @@  static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index)
 	struct msm_gpu *gpu = &adreno_gpu->base;
 	int ret;
 
+	/*
+	 * This can get called from devfreq while the hardware is idle. Don't
+	 * bring up the power if it isn't already active
+	 */
+	if (pm_runtime_get_if_in_use(gmu->dev) == 0)
+		return;
+
 	gmu_write(gmu, REG_A6XX_GMU_DCVS_ACK_OPTION, 0);
 
 	gmu_write(gmu, REG_A6XX_GMU_DCVS_PERF_SETTING,
@@ -134,6 +141,7 @@  static void __a6xx_gmu_set_freq(struct a6xx_gmu *gmu, int index)
 	 * for now leave it at max so that the performance is nominal.
 	 */
 	icc_set_bw(gpu->icc_path, 0, MBps_to_icc(7216));
+	pm_runtime_put(gmu->dev);
 }
 
 void a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long freq)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 68af24150de5..2c09d2c21773 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -810,6 +810,11 @@  static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu)
 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 	u64 busy_cycles, busy_time;
 
+
+	/* Only read the gpu busy if the hardware is already active */
+	if (pm_runtime_get_if_in_use(a6xx_gpu->gmu.dev) == 0)
+		return 0;
+
 	busy_cycles = gmu_read64(&a6xx_gpu->gmu,
 			REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L,
 			REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H);
@@ -819,6 +824,8 @@  static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu)
 
 	gpu->devfreq.busy_cycles = busy_cycles;
 
+	pm_runtime_put(a6xx_gpu->gmu.dev);
+
 	if (WARN_ON(busy_time > ~0LU))
 		return ~0LU;