diff mbox

[13/13] drm/msm/gpu: Add the buffer objects from the submit to the crash dump

Message ID 20180629165641.1348-14-jcrouse@codeaurora.org (mailing list archive)
State Not Applicable, archived
Delegated to: Andy Gross
Headers show

Commit Message

Jordan Crouse June 29, 2018, 4:56 p.m. UTC
For hangs, dump copy out the contents of the buffer objects attached to the
guilty submission and print them in the crash dump report.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---
 Documentation/gpu/drm-msm-crash-dump.txt |  7 +++
 drivers/gpu/drm/msm/adreno/adreno_gpu.c  | 58 ++++++++++++++++++++----
 drivers/gpu/drm/msm/msm_gpu.c            | 48 ++++++++++++++++++--
 drivers/gpu/drm/msm/msm_gpu.h            |  9 ++++
 4 files changed, 109 insertions(+), 13 deletions(-)

Comments

kernel test robot June 29, 2018, 7:31 p.m. UTC | #1
Hi Jordan,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on robclark/msm-next]
[also build test WARNING on v4.18-rc2 next-20180629]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Jordan-Crouse/drm-msm-Capture-and-dump-the-GPU-crash-state/20180630-015229
base:   git://people.freedesktop.org/~robclark/linux msm-next
config: arm-multi_v7_defconfig (attached as .config)
compiler: arm-linux-gnueabi-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        GCC_VERSION=7.2.0 make.cross ARCH=arm 

All warnings (new ones prefixed by >>):

   drivers/gpu/drm/msm/adreno/adreno_gpu.c: In function 'adreno_show':
>> drivers/gpu/drm/msm/adreno/adreno_gpu.c:537:31: warning: format '%ld' expects argument of type 'long int', but argument 3 has type 'size_t {aka unsigned int}' [-Wformat=]
       drm_printf(p, "    size: %ld\n", state->bos[i].size);
                                ~~^     ~~~~~~~~~~~~~~~~~~
                                %d

vim +537 drivers/gpu/drm/msm/adreno/adreno_gpu.c

   500	
   501	void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
   502			struct drm_printer *p)
   503	{
   504		struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
   505		int i;
   506	
   507		if (IS_ERR_OR_NULL(state))
   508			return;
   509	
   510		drm_printf(p, "revision: %d (%d.%d.%d.%d)\n",
   511				adreno_gpu->info->revn, adreno_gpu->rev.core,
   512				adreno_gpu->rev.major, adreno_gpu->rev.minor,
   513				adreno_gpu->rev.patchid);
   514	
   515		drm_printf(p, "rbbm-status: 0x%08x\n", state->rbbm_status);
   516	
   517		drm_puts(p, "ringbuffer:\n");
   518	
   519		for (i = 0; i < gpu->nr_rings; i++) {
   520			drm_printf(p, "  - id: %d\n", i);
   521			drm_printf(p, "    last-fence: %d\n", state->ring[i].seqno);
   522			drm_printf(p, "    retired-fence: %d\n", state->ring[i].fence);
   523			drm_printf(p, "    rptr: %d\n", state->ring[i].rptr);
   524			drm_printf(p, "    wptr: %d\n", state->ring[i].wptr);
   525			drm_printf(p, "    size: %d\n", MSM_GPU_RINGBUFFER_SZ);
   526	
   527			adreno_show_object(p, state->ring[i].data,
   528				state->ring[i].data_size);
   529		}
   530	
   531		if (state->bos) {
   532			drm_puts(p, "bos:\n");
   533	
   534			for (i = 0; i < state->nr_bos; i++) {
   535				drm_printf(p, "  - iova: 0x%016llx\n",
   536					state->bos[i].iova);
 > 537				drm_printf(p, "    size: %ld\n", state->bos[i].size);
   538	
   539				adreno_show_object(p, state->bos[i].data,
   540					state->bos[i].size);
   541			}
   542		}
   543	
   544		drm_puts(p, "registers:\n");
   545	
   546		for (i = 0; i < state->nr_registers; i++) {
   547			drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
   548				state->registers[i * 2] << 2,
   549				state->registers[(i * 2) + 1]);
   550		}
   551	}
   552	#endif
   553	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
diff mbox

Patch

diff --git a/Documentation/gpu/drm-msm-crash-dump.txt b/Documentation/gpu/drm-msm-crash-dump.txt
index 7083075c6f87..b9804dd112db 100644
--- a/Documentation/gpu/drm-msm-crash-dump.txt
+++ b/Documentation/gpu/drm-msm-crash-dump.txt
@@ -40,6 +40,13 @@  bos:		# List of buffers from the hanging submission (if known)
    data:		# [ascii85] The contents of the ring encoded as ascii85.
 			# Only the unused portions of the ring will be printed
 			# (up to a maximum of 'size' bytes)
+bos:		# List of buffers from the hanging submission (if known)
+  -iova:		# [hex] GPU address of the buffer
+   size:		# [decimal] Size of the buffer (in bytes)
+   data:		# [ascii85] The contents of the buffer encoded as
+			# ascii85. Only the contents of buffers marked as
+			# readable are dumped. Trailing zeros at the end of the
+			# buffer won't be dumped.
 registers:	# Sets of register values. This section can be used multiple
 		# times for different ranges of registers. Each register will be
 		# on its own line.
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index be81fe1f6a35..ce8b7af0a77d 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -438,6 +438,10 @@  void adreno_gpu_state_destroy(struct msm_gpu_state *state)
 	for (i = 0; i < ARRAY_SIZE(state->ring); i++)
 		kfree(state->ring[i].data);
 
+	for (i = 0; state->bos && i < state->nr_bos; i++)
+		kvfree(state->bos[i].data);
+
+	kfree(state->bos);
 	kfree(state->comm);
 	kfree(state->cmd);
 	kfree(state->registers);
@@ -461,6 +465,39 @@  int adreno_gpu_state_put(struct msm_gpu_state *state)
 }
 
 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
+
+static void adreno_show_object(struct drm_printer *p, u32 *ptr, int len)
+{
+	char out[ASCII85_BUFSZ];
+	long l, datalen, i;
+
+	if (!ptr || !len)
+		return;
+
+	/*
+	 * Only dump the non-zero part of the buffer - rarely will any data
+	 * completely fill the entire allocated size of the buffer
+	 */
+	for (datalen = 0, i = 0; i < len >> 2; i++) {
+		if (ptr[i])
+			datalen = i << 2;
+	}
+
+	/* Skip printing the object if it is empty */
+	if (datalen == 0)
+		return;
+
+	l = ascii85_encode_len(datalen);
+
+	drm_puts(p, "    data: !!ascii85 |\n");
+	drm_puts(p, "     ");
+
+	for (i = 0; i < l; i++)
+		drm_puts(p, ascii85_encode(ptr[i], out));
+
+	drm_puts(p, "\n");
+}
+
 void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
 		struct drm_printer *p)
 {
@@ -487,19 +524,20 @@  void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
 		drm_printf(p, "    wptr: %d\n", state->ring[i].wptr);
 		drm_printf(p, "    size: %d\n", MSM_GPU_RINGBUFFER_SZ);
 
-		if (state->ring[i].data && state->ring[i].data_size) {
-			u32 *ptr = (u32 *) state->ring[i].data;
-			char out[ASCII85_BUFSZ];
-			long len = ascii85_encode_len(state->ring[i].data_size);
-			int j;
+		adreno_show_object(p, state->ring[i].data,
+			state->ring[i].data_size);
+	}
 
-			drm_printf(p, "    data: !!ascii85 |\n");
-			drm_printf(p, "     ");
+	if (state->bos) {
+		drm_puts(p, "bos:\n");
 
-			for (j = 0; j < len; j++)
-				drm_printf(p, ascii85_encode(ptr[j], out));
+		for (i = 0; i < state->nr_bos; i++) {
+			drm_printf(p, "  - iova: 0x%016llx\n",
+				state->bos[i].iova);
+			drm_printf(p, "    size: %ld\n", state->bos[i].size);
 
-			drm_printf(p, "\n");
+			adreno_show_object(p, state->bos[i].data,
+				state->bos[i].size);
 		}
 	}
 
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 1945736fc448..03e62f6ea2b6 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -318,8 +318,39 @@  static void msm_gpu_devcoredump_free(void *data)
 	msm_gpu_crashstate_put(gpu);
 }
 
-static void msm_gpu_crashstate_capture(struct msm_gpu *gpu, char *comm,
-		char *cmd)
+static void msm_gpu_crashstate_get_bo(struct msm_gpu_state *state,
+		struct msm_gem_object *obj, u64 iova, u32 flags)
+{
+	struct msm_gpu_state_bo *state_bo = &state->bos[state->nr_bos];
+
+	/* Don't record write only objects */
+
+	state_bo->size = obj->base.size;
+	state_bo->iova = iova;
+
+	/* Only store the data for buffer objects marked for read */
+	if ((flags & MSM_SUBMIT_BO_READ)) {
+		void *ptr;
+
+		state_bo->data = kvmalloc(obj->base.size, GFP_KERNEL);
+		if (!state_bo->data)
+			return;
+
+		ptr = msm_gem_get_vaddr_active(&obj->base);
+		if (IS_ERR(ptr)) {
+			kvfree(state_bo->data);
+			return;
+		}
+
+		memcpy(state_bo->data, ptr, obj->base.size);
+		msm_gem_put_vaddr(&obj->base);
+	}
+
+	state->nr_bos++;
+}
+
+static void msm_gpu_crashstate_capture(struct msm_gpu *gpu,
+		struct msm_gem_submit *submit, char *comm, char *cmd)
 {
 	struct msm_gpu_state *state;
 
@@ -335,6 +366,17 @@  static void msm_gpu_crashstate_capture(struct msm_gpu *gpu, char *comm,
 	state->comm = kstrdup(comm, GFP_KERNEL);
 	state->cmd = kstrdup(cmd, GFP_KERNEL);
 
+	if (submit) {
+		int i;
+
+		state->bos = kcalloc(submit->nr_bos,
+			sizeof(struct msm_gpu_state_bo), GFP_KERNEL);
+
+		for (i = 0; state->bos && i < submit->nr_bos; i++)
+			msm_gpu_crashstate_get_bo(state, submit->bos[i].obj,
+				submit->bos[i].iova, submit->bos[i].flags);
+	}
+
 	/* Set the active crash state to be dumped on failure */
 	gpu->crashstate = state;
 
@@ -434,7 +476,7 @@  static void recover_worker(struct work_struct *work)
 
 	/* Record the crash state */
 	pm_runtime_get_sync(&gpu->pdev->dev);
-	msm_gpu_crashstate_capture(gpu, comm, cmd);
+	msm_gpu_crashstate_capture(gpu, submit, comm, cmd);
 	pm_runtime_put_sync(&gpu->pdev->dev);
 
 	kfree(cmd);
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 48f7b21f1cae..8242c6e0f107 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -181,6 +181,12 @@  struct msm_gpu_submitqueue {
 	struct kref ref;
 };
 
+struct msm_gpu_state_bo {
+	u64 iova;
+	size_t size;
+	void *data;
+};
+
 struct msm_gpu_state {
 	struct kref ref;
 	struct timeval time;
@@ -201,6 +207,9 @@  struct msm_gpu_state {
 
 	char *comm;
 	char *cmd;
+
+	int nr_bos;
+	struct msm_gpu_state_bo *bos;
 };
 
 static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data)