Message ID | 20220302225551.v1.2.Icda301aa85f1e4367601fa9b830b3365d377e669@changeid (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Support for GMU coredump and some related improvements | expand |
Hi Akhil, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on drm/drm-next] [also build test WARNING on drm-intel/for-linux-next drm-tip/drm-tip drm-exynos/exynos-drm-next tegra-drm/drm/tegra/for-next v5.17-rc7 next-20220308] [cannot apply to airlied/drm-next] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Akhil-P-Oommen/Support-for-GMU-coredump-and-some-related-improvements/20220303-013028 base: git://anongit.freedesktop.org/drm/drm drm-next config: riscv-randconfig-r042-20220307 (https://download.01.org/0day-ci/archive/20220308/202203082018.IcI00Nvs-lkp@intel.com/config) compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project d271fc04d5b97b12e6b797c6067d3c96a8d7470e) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # install riscv cross compiling tool for clang build # apt-get install binutils-riscv64-linux-gnu # https://github.com/0day-ci/linux/commit/23953efc645803299a93f178e9a32f2ae97dae39 git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Akhil-P-Oommen/Support-for-GMU-coredump-and-some-related-improvements/20220303-013028 git checkout 23953efc645803299a93f178e9a32f2ae97dae39 # save the config file to linux build tree mkdir build_dir COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=riscv SHELL=/bin/bash drivers/gpu/drm/msm/ If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): >> drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:967:6: warning: no previous prototype for function 'a6xx_get_gmu_state' [-Wmissing-prototypes] void a6xx_get_gmu_state(struct msm_gpu *gpu, struct a6xx_gpu_state *a6xx_state) ^ drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c:967:1: note: declare 'static' if the function is not intended to be used outside of this translation unit void a6xx_get_gmu_state(struct msm_gpu *gpu, struct a6xx_gpu_state *a6xx_state) ^ static 1 warning generated. vim +/a6xx_get_gmu_state +967 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c 966 > 967 void a6xx_get_gmu_state(struct msm_gpu *gpu, struct a6xx_gpu_state *a6xx_state) 968 { 969 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); 970 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); 971 struct a6xx_gmu *gmu = &a6xx_gpu->gmu; 972 973 if (gmu->hung) 974 a6xx_gmu_send_nmi(gmu); 975 976 a6xx_get_gmu_registers(gpu, a6xx_state); 977 } 978 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 3e325e2..f208a81 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -14,6 +14,37 @@ #include "msm_gpu_trace.h" #include "msm_mmu.h" +void a6xx_gmu_send_nmi(struct a6xx_gmu *gmu) +{ + struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; + struct msm_gpu *gpu = &adreno_gpu->base; + u32 val; + + if (a6xx_gmu_gx_is_on(gmu) && a6xx_is_smmu_stalled(gpu)) { + DRM_DEV_ERROR(gmu->dev, + "Skipping GMU NMI since SMMU is stalled\n"); + } + + /* Don't retrigger NMI if gmu reset is already active */ + val = gmu_read(gmu, REG_A6XX_GMU_CM3_FW_INIT_RESULT); + if (val & 0xE00) + return; + + /* Mask all interrupts from GMU first */ + gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_MASK, 0xFFFFFFFF); + + /* Trigger NMI to make gmu save it's internal state to ddr */ + val = gmu_read(gmu, REG_A6XX_GMU_CM3_CFG); + gmu_write(gmu, REG_A6XX_GMU_CM3_CFG, val | BIT(9)); + + /* Barrier to ensure write is posted before we proceed */ + wmb(); + + /* Small delay to ensure state copy is ddr is complete at GMU */ + udelay(200); +} + static void a6xx_gmu_fault(struct a6xx_gmu *gmu) { struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); @@ -790,6 +821,12 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state) gmu_write(gmu, REG_A6XX_GMU_CM3_FW_INIT_RESULT, 0); gmu_write(gmu, REG_A6XX_GMU_CM3_BOOT_CONFIG, 0x02); + /* + * Make sure that the NMI bit is cleared by configuring the reset value + * here + */ + gmu_write(gmu, REG_A6XX_GMU_CM3_CFG, 0x4052); + /* Write the iova of the HFI table */ gmu_write(gmu, REG_A6XX_GMU_HFI_QTBL_ADDR, gmu->hfi.iova); gmu_write(gmu, REG_A6XX_GMU_HFI_QTBL_INFO, 1); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h index 84bd516..4228ec1 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h @@ -186,5 +186,6 @@ int a6xx_hfi_set_freq(struct a6xx_gmu *gmu, int index); bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu); bool a6xx_gmu_sptprac_is_on(struct a6xx_gmu *gmu); +void a6xx_gmu_send_nmi(struct a6xx_gmu *gmu); #endif diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index 7de9d2f..09b2ff0 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -964,6 +964,18 @@ static void a6xx_get_indexed_registers(struct msm_gpu *gpu, a6xx_state->nr_indexed_regs = count; } +void a6xx_get_gmu_state(struct msm_gpu *gpu, struct a6xx_gpu_state *a6xx_state) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; + + if (gmu->hung) + a6xx_gmu_send_nmi(gmu); + + a6xx_get_gmu_registers(gpu, a6xx_state); +} + struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu) { struct a6xx_crashdumper _dumper = { 0 }, *dumper = NULL; @@ -980,7 +992,7 @@ struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu) /* Get the generic state from the adreno core */ adreno_gpu_state_get(gpu, &a6xx_state->base); - a6xx_get_gmu_registers(gpu, a6xx_state); + a6xx_get_gmu_state(gpu, a6xx_state); a6xx_state->gmu_log = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.log); a6xx_state->gmu_hfi = a6xx_snapshot_gmu_bo(a6xx_state, &a6xx_gpu->gmu.hfi);
While capturing gmu state, first send an NMI to gmu when it is hung. This helps to move gmu to a safe state. Signed-off-by: Akhil P Oommen <quic_akhilpo@quicinc.com> --- drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 37 +++++++++++++++++++++++++++++ drivers/gpu/drm/msm/adreno/a6xx_gmu.h | 1 + drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 14 ++++++++++- 3 files changed, 51 insertions(+), 1 deletion(-)