diff mbox series

[v4,2/2] drm/panfrost: Add support for devcoredump

Message ID 20220622143616.1265405-3-adrian.larumbe@collabora.com (mailing list archive)
State New, archived
Headers show
Series devcoredump support for Panfrost GPU driver | expand

Commit Message

Adrian Larumbe June 22, 2022, 2:36 p.m. UTC
In the event of a job timeout, debug dump information will be written into
/sys/class/devcoredump.

Inspired by etnaviv's similar feature.

Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com>
---
 drivers/gpu/drm/panfrost/Kconfig         |   1 +
 drivers/gpu/drm/panfrost/Makefile        |   3 +-
 drivers/gpu/drm/panfrost/panfrost_dump.c | 249 +++++++++++++++++++++++
 drivers/gpu/drm/panfrost/panfrost_dump.h |  12 ++
 drivers/gpu/drm/panfrost/panfrost_job.c  |   3 +
 include/uapi/drm/panfrost_drm.h          |  47 +++++
 6 files changed, 314 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/panfrost/panfrost_dump.c
 create mode 100644 drivers/gpu/drm/panfrost/panfrost_dump.h

Comments

Steven Price June 23, 2022, 12:23 p.m. UTC | #1
On 22/06/2022 15:36, Adrián Larumbe wrote:
> In the event of a job timeout, debug dump information will be written into
> /sys/class/devcoredump.
> 
> Inspired by etnaviv's similar feature.
> 
> Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com>

Looks pretty good, a few comments below.

> ---
>  drivers/gpu/drm/panfrost/Kconfig         |   1 +
>  drivers/gpu/drm/panfrost/Makefile        |   3 +-
>  drivers/gpu/drm/panfrost/panfrost_dump.c | 249 +++++++++++++++++++++++
>  drivers/gpu/drm/panfrost/panfrost_dump.h |  12 ++
>  drivers/gpu/drm/panfrost/panfrost_job.c  |   3 +
>  include/uapi/drm/panfrost_drm.h          |  47 +++++
>  6 files changed, 314 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/panfrost/panfrost_dump.c
>  create mode 100644 drivers/gpu/drm/panfrost/panfrost_dump.h
> 
> diff --git a/drivers/gpu/drm/panfrost/Kconfig b/drivers/gpu/drm/panfrost/Kconfig
> index 86cdc0ce79e6..079600328be1 100644
> --- a/drivers/gpu/drm/panfrost/Kconfig
> +++ b/drivers/gpu/drm/panfrost/Kconfig
> @@ -11,6 +11,7 @@ config DRM_PANFROST
>  	select DRM_GEM_SHMEM_HELPER
>  	select PM_DEVFREQ
>  	select DEVFREQ_GOV_SIMPLE_ONDEMAND
> +	select WANT_DEV_COREDUMP
>  	help
>  	  DRM driver for ARM Mali Midgard (T6xx, T7xx, T8xx) and
>  	  Bifrost (G3x, G5x, G7x) GPUs.
> diff --git a/drivers/gpu/drm/panfrost/Makefile b/drivers/gpu/drm/panfrost/Makefile
> index b71935862417..7da2b3f02ed9 100644
> --- a/drivers/gpu/drm/panfrost/Makefile
> +++ b/drivers/gpu/drm/panfrost/Makefile
> @@ -9,6 +9,7 @@ panfrost-y := \
>  	panfrost_gpu.o \
>  	panfrost_job.o \
>  	panfrost_mmu.o \
> -	panfrost_perfcnt.o
> +	panfrost_perfcnt.o \
> +	panfrost_dump.o
>  
>  obj-$(CONFIG_DRM_PANFROST) += panfrost.o
> diff --git a/drivers/gpu/drm/panfrost/panfrost_dump.c b/drivers/gpu/drm/panfrost/panfrost_dump.c
> new file mode 100644
> index 000000000000..a710ed7bcefa
> --- /dev/null
> +++ b/drivers/gpu/drm/panfrost/panfrost_dump.c
> @@ -0,0 +1,249 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright 2021 Collabora ltd. */
> +
> +#include <linux/err.h>
> +#include <linux/device.h>
> +#include <linux/devcoredump.h>
> +#include <linux/moduleparam.h>
> +#include <linux/iosys-map.h>
> +#include <drm/panfrost_drm.h>
> +#include <drm/drm_device.h>
> +
> +#include "panfrost_job.h"
> +#include "panfrost_gem.h"
> +#include "panfrost_regs.h"
> +#include "panfrost_dump.h"
> +#include "panfrost_device.h"
> +
> +static bool panfrost_dump_core = true;
> +module_param_named(dump_core, panfrost_dump_core, bool, 0600);
> +
> +struct panfrost_dump_iterator {
> +	void *start;
> +	struct panfrost_dump_object_header *hdr;
> +	void *data;
> +};
> +
> +static const unsigned short panfrost_dump_registers[] = {
> +	SHADER_READY_LO,
> +	SHADER_READY_HI,
> +	TILER_READY_LO,
> +	TILER_READY_HI,
> +	L2_READY_LO,
> +	L2_READY_HI,
> +	JOB_INT_MASK,
> +	JOB_INT_STAT,
> +	JS_HEAD_LO(0),
> +	JS_HEAD_HI(0),
> +	JS_TAIL_LO(0),
> +	JS_TAIL_HI(0),
> +	JS_AFFINITY_LO(0),
> +	JS_AFFINITY_HI(0),
> +	JS_CONFIG(0),
> +	JS_STATUS(0),
> +	JS_HEAD_NEXT_LO(0),
> +	JS_HEAD_NEXT_HI(0),
> +	JS_AFFINITY_NEXT_LO(0),
> +	JS_AFFINITY_NEXT_HI(0),
> +	JS_CONFIG_NEXT(0),
> +	MMU_INT_MASK,
> +	MMU_INT_STAT,
> +	AS_TRANSTAB_LO(0),
> +	AS_TRANSTAB_HI(0),
> +	AS_MEMATTR_LO(0),
> +	AS_MEMATTR_HI(0),
> +	AS_FAULTSTATUS(0),
> +	AS_FAULTADDRESS_LO(0),
> +	AS_FAULTADDRESS_HI(0),
> +	AS_STATUS(0),
> +};
> +
> +static void panfrost_core_dump_header(struct panfrost_dump_iterator *iter,
> +	u32 type, void *data_end)
> +{
> +	struct panfrost_dump_object_header *hdr = iter->hdr;
> +
> +	hdr->magic = cpu_to_le32(PANFROSTDUMP_MAGIC);
> +	hdr->type = cpu_to_le32(type);
> +	hdr->file_offset = cpu_to_le32(iter->data - iter->start);
> +	hdr->file_size = cpu_to_le32(data_end - iter->data);
> +
> +	iter->hdr++;
> +	iter->data += le32_to_cpu(hdr->file_size);
> +}
> +
> +static void
> +panfrost_core_dump_registers(struct panfrost_dump_iterator *iter,
> +			     struct panfrost_device *pfdev,
> +			     u32 as_nr, int slot)
> +{
> +	struct panfrost_dump_registers *dumpreg = iter->data;
> +	unsigned int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(panfrost_dump_registers); i++, dumpreg++) {
> +		unsigned int js_as_offset = 0;
> +		unsigned int reg;
> +
> +		if (panfrost_dump_registers[i] >= JS_HEAD_LO(0) &&
> +		    panfrost_dump_registers[i] <= JS_CONFIG_NEXT(0))

It would be good to use something more generic than specific registers
in case the list of registers is ever changed. We have JS_BASE already
which would work for the lower end. We seem to be missing the equivalent
MMU_BASE #define currently. The upper end is base+stride, so for JS we have:

	if (panfrost_dump_registers[i] >= JS_BASE &&
	    panfrost_dump_registers[i] < JS_BASE + JS_SLOT_STRIDE)

> +			js_as_offset = slot * JS_SLOT_STRIDE;
> +		else if (panfrost_dump_registers[i] >= AS_TRANSTAB_LO(0) &&
> +			 panfrost_dump_registers[i] <= AS_STATUS(0))
> +			js_as_offset = (as_nr << MMU_AS_SHIFT);
> +
> +		reg = panfrost_dump_registers[i] + js_as_offset;
> +
> +		dumpreg->reg = cpu_to_le32(reg);
> +		dumpreg->value = cpu_to_le32(gpu_read(pfdev, reg));
> +	}
> +
> +	panfrost_core_dump_header(iter, PANFROSTDUMP_BUF_REG, dumpreg);
> +}
> +
> +void panfrost_core_dump(struct panfrost_job *job)
> +{
> +	struct panfrost_device *pfdev = job->pfdev;
> +	struct panfrost_dump_iterator iter;
> +	struct drm_gem_object *dbo;
> +	unsigned int n_obj, n_bomap_pages;
> +	__le64 *bomap, *bomap_start;
> +	size_t file_size;
> +	u32 as_nr;
> +	int slot;
> +	int ret, i;
> +
> +	as_nr = job->mmu->as;
> +	slot = panfrost_job_get_slot(job);
> +
> +	/* Only catch the first event, or when manually re-armed */
> +	if (!panfrost_dump_core)
> +		return;
> +	panfrost_dump_core = false;
> +
> +	/* At least, we dump registers and end marker */
> +	n_obj = 2;
> +	n_bomap_pages = 0;
> +	file_size = ARRAY_SIZE(panfrost_dump_registers) *
> +			sizeof(struct panfrost_dump_registers);
> +
> +	/* Add in the active buffer objects */
> +	for (i = 0; i < job->bo_count; i++) {
> +		/*
> +		 * Even though the CPU could be configured to use 16K or 64K pages, this
> +		 * is a very unusual situation for most kernel setups on SoCs that have
> +		 * a Panfrost device. Also many places across the driver make the somewhat
> +		 * arbitrary assumption that Panfrost's MMU page size is the same as the CPU's,
> +		 * so let's have a sanity check to ensure that's always the case
> +		 */
> +		WARN_ON(!IS_ALIGNED(dbo->size, PAGE_SIZE));

This warn needs to go after the assignment below - I just spent a few
minutes tracking down why I was getting a NULL pointer dereference.
Sadly my GCC doesn't seem to be able to warn about this uninitialised use :(

> +
> +		dbo = job->bos[i];
> +		file_size += dbo->size;
> +		n_bomap_pages += dbo->size >> PAGE_SHIFT;
> +		n_obj++;
> +	}
> +
> +	/* If we have any buffer objects, add a bomap object */
> +	if (n_bomap_pages) {
> +		file_size += n_bomap_pages * sizeof(*bomap);
> +		n_obj++;
> +	}
> +
> +	/* Add the size of the headers */
> +	file_size += sizeof(*iter.hdr) * n_obj;
> +
> +	/*
> +	 * Allocate the file in vmalloc memory, it's likely to be big.
> +	 * The reason behind these GFP flags is that we don't want to trigger the
> +	 * OOM killer in the event that not enough memory could be found for our
> +	 * dump file. We also don't want the allocator to do any error reporting,
> +	 * as the right behaviour is failing gracefully if a big enough buffer
> +	 * could not be allocated.
> +	 */
> +	iter.start = __vmalloc(file_size, GFP_KERNEL | __GFP_NOWARN |
> +			__GFP_NORETRY);
> +	if (!iter.start) {
> +		dev_warn(pfdev->dev, "failed to allocate devcoredump file\n");
> +		return;
> +	}
> +
> +	/* Point the data member after the headers */
> +	iter.hdr = iter.start;
> +	iter.data = &iter.hdr[n_obj];
> +
> +	memset(iter.hdr, 0, iter.data - iter.start);
> +
> +	/*
> +	 * For now, we write the job identifier in the register dump header,
> +	 * so that we can decode the entire dump later with pandecode
> +	 */
> +	iter.hdr->reghdr.jc = cpu_to_le64(job->jc);
> +	iter.hdr->reghdr.major = cpu_to_le32(PANFROSTDUMP_MAJOR);
> +	iter.hdr->reghdr.minor = cpu_to_le32(PANFROSTDUMP_MINOR);
> +	iter.hdr->reghdr.gpu_id = cpu_to_le32(pfdev->features.id);
> +	iter.hdr->reghdr.nbos = cpu_to_le64(job->bo_count);
> +
> +	panfrost_core_dump_registers(&iter, pfdev, as_nr, slot);
> +
> +	/* Reserve space for the bomap */
> +	if (job->bo_count) {
> +		bomap_start = bomap = iter.data;
> +		memset(bomap, 0, sizeof(*bomap) * n_bomap_pages);
> +		panfrost_core_dump_header(&iter, PANFROSTDUMP_BUF_BOMAP,
> +					  bomap + n_bomap_pages);
> +	}
> +
> +	for (i = 0; i < job->bo_count; i++) {
> +		struct iosys_map map;
> +		struct panfrost_gem_mapping *mapping;
> +		struct panfrost_gem_object *bo;
> +		struct sg_page_iter page_iter;
> +		void *vaddr;
> +
> +		bo = to_panfrost_bo(job->bos[i]);
> +		mapping = job->mappings[i];
> +
> +		if (!bo->base.sgt) {
> +			dev_err(pfdev->dev, "Panfrost Dump: BO has no sgt, cannot dump\n");
> +			iter.hdr->bomap.valid = 0;
> +			goto dump_header;
> +		}
> +
> +		ret = drm_gem_shmem_vmap(&bo->base, &map);
> +		if (ret) {
> +			dev_err(pfdev->dev, "Panfrost Dump: couldn't map Buffer Object\n");
> +			iter.hdr->bomap.valid = 0;
> +			goto dump_header;
> +		}
> +
> +		WARN_ON(!mapping->active);
> +
> +		iter.hdr->bomap.data[0] = cpu_to_le32((bomap - bomap_start));
> +
> +		for_each_sgtable_page(bo->base.sgt, &page_iter, 0) {
> +			struct page *page = sg_page_iter_page(&page_iter);
> +
> +			if (!IS_ERR(page)) {
> +				*bomap++ = cpu_to_le64(page_to_phys(page));
> +			} else {
> +				dev_err(pfdev->dev, "Panfrost Dump: wrong page\n");
> +				*bomap++ = ~cpu_to_le64(0);
> +			}
> +		}
> +
> +		iter.hdr->bomap.iova = cpu_to_le64(mapping->mmnode.start << PAGE_SHIFT);
> +
> +		vaddr = map.vaddr;
> +		memcpy(iter.data, vaddr, bo->base.base.size);
> +
> +		drm_gem_shmem_vunmap(&bo->base, &map);
> +
> +		iter.hdr->bomap.valid = cpu_to_le64(1);

'valid' is only 32 bit so this should be cpu_to_le32(1).

Steve

> +
> +dump_header:	panfrost_core_dump_header(&iter, PANFROSTDUMP_BUF_BO, iter.data +
> +					  bo->base.base.size);
> +	}
> +	panfrost_core_dump_header(&iter, PANFROSTDUMP_BUF_TRAILER, iter.data);
> +
> +	dev_coredumpv(pfdev->dev, iter.start, iter.data - iter.start, GFP_KERNEL);
> +}
> diff --git a/drivers/gpu/drm/panfrost/panfrost_dump.h b/drivers/gpu/drm/panfrost/panfrost_dump.h
> new file mode 100644
> index 000000000000..7d9bcefa5346
> --- /dev/null
> +++ b/drivers/gpu/drm/panfrost/panfrost_dump.h
> @@ -0,0 +1,12 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright 2021 Collabora ltd.
> + */
> +
> +#ifndef PANFROST_DUMP_H
> +#define PANFROST_DUMP_H
> +
> +struct panfrost_job;
> +void panfrost_core_dump(struct panfrost_job *job);
> +
> +#endif
> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
> index 7c4208476fbd..dbc597ab46fb 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> @@ -20,6 +20,7 @@
>  #include "panfrost_regs.h"
>  #include "panfrost_gpu.h"
>  #include "panfrost_mmu.h"
> +#include "panfrost_dump.h"
>  
>  #define JOB_TIMEOUT_MS 500
>  
> @@ -727,6 +728,8 @@ static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job
>  		job_read(pfdev, JS_TAIL_LO(js)),
>  		sched_job);
>  
> +	panfrost_core_dump(job);
> +
>  	atomic_set(&pfdev->reset.pending, 1);
>  	panfrost_reset(pfdev, sched_job);
>  
> diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h
> index 9e40277d8185..eac87310b348 100644
> --- a/include/uapi/drm/panfrost_drm.h
> +++ b/include/uapi/drm/panfrost_drm.h
> @@ -224,6 +224,53 @@ struct drm_panfrost_madvise {
>  	__u32 retained;       /* out, whether backing store still exists */
>  };
>  
> +/* Definitions for coredump decoding in user space */
> +#define PANFROSTDUMP_MAJOR 1
> +#define PANFROSTDUMP_MINOR 0
> +
> +#define PANFROSTDUMP_MAGIC 0x464E4150 /* PANF */
> +
> +#define PANFROSTDUMP_BUF_REG 0
> +#define PANFROSTDUMP_BUF_BOMAP (PANFROSTDUMP_BUF_REG + 1)
> +#define PANFROSTDUMP_BUF_BO (PANFROSTDUMP_BUF_BOMAP + 1)
> +#define PANFROSTDUMP_BUF_TRAILER (PANFROSTDUMP_BUF_BO + 1)
> +
> +struct panfrost_dump_object_header {
> +	__le32 magic;
> +	__le32 type;
> +	__le32 file_size;
> +	__le32 file_offset;
> +
> +	union {
> +		struct pan_reg_hdr {
> +			__le64 jc;
> +			__le32 gpu_id;
> +			__le32 major;
> +			__le32 minor;
> +			__le64 nbos;
> +		} reghdr;
> +
> +		struct pan_bomap_hdr {
> +			__le32 valid;
> +			__le64 iova;
> +			__le32 data[2];
> +		} bomap;
> +
> +		/*
> +		 * Force same size in case we want to expand the header
> +		 * with new fields and also keep it 512-byte aligned
> +		 */
> +
> +		__le32 sizer[496];
> +	};
> +};
> +
> +/* Registers object, an array of these */
> +struct panfrost_dump_registers {
> +	__le32 reg;
> +	__le32 value;
> +};
> +
>  #if defined(__cplusplus)
>  }
>  #endif
Adrian Larumbe July 18, 2022, 4:30 p.m. UTC | #2
Hi Steven, once again thanks for the feedback. I was off for some time and then
busy with other stuff, but I can finally work on a new revision for the patch

On 23.06.2022 13:23, Steven Price wrote:
> On 22/06/2022 15:36, Adrián Larumbe wrote:
> > In the event of a job timeout, debug dump information will be written into
> > /sys/class/devcoredump.
> > 
> > Inspired by etnaviv's similar feature.
> > 
> > Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com>
> 
> Looks pretty good, a few comments below.

> > +static void
> > +panfrost_core_dump_registers(struct panfrost_dump_iterator *iter,
> > +			     struct panfrost_device *pfdev,
> > +			     u32 as_nr, int slot)
> > +{
> > +	struct panfrost_dump_registers *dumpreg = iter->data;
> > +	unsigned int i;
> > +
> > +	for (i = 0; i < ARRAY_SIZE(panfrost_dump_registers); i++, dumpreg++) {
> > +		unsigned int js_as_offset = 0;
> > +		unsigned int reg;
> > +
> > +		if (panfrost_dump_registers[i] >= JS_HEAD_LO(0) &&
> > +		    panfrost_dump_registers[i] <= JS_CONFIG_NEXT(0))
> 
> It would be good to use something more generic than specific registers
> in case the list of registers is ever changed. We have JS_BASE already
> which would work for the lower end. We seem to be missing the equivalent
> MMU_BASE #define currently. The upper end is base+stride, so for JS we have:

I defined a MMU_AS_STRIDE in the same file, but I feel a bit weird about this
naming because, unlike for the job slot registers, the stride isn't linear.
However for the sake of clarity I think it should be alright.

> > 	if (panfrost_dump_registers[i] >= JS_BASE &&
> > 	    panfrost_dump_registers[i] < JS_BASE + JS_SLOT_STRIDE)
> > +			js_as_offset = slot * JS_SLOT_STRIDE;
> > +		else if (panfrost_dump_registers[i] >= AS_TRANSTAB_LO(0) &&
> > +			 panfrost_dump_registers[i] <= AS_STATUS(0))
> > +			js_as_offset = (as_nr << MMU_AS_SHIFT);
> > +
> > +		reg = panfrost_dump_registers[i] + js_as_offset;
> > +
> > +		dumpreg->reg = cpu_to_le32(reg);
> > +		dumpreg->value = cpu_to_le32(gpu_read(pfdev, reg));
> > +	}
> > +
> > +	panfrost_core_dump_header(iter, PANFROSTDUMP_BUF_REG, dumpreg);
> > +}
> > +
> > +void panfrost_core_dump(struct panfrost_job *job)
> > +{
> > +	struct panfrost_device *pfdev = job->pfdev;
> > +	struct panfrost_dump_iterator iter;
> > +	struct drm_gem_object *dbo;
> > +	unsigned int n_obj, n_bomap_pages;
> > +	__le64 *bomap, *bomap_start;
> > +	size_t file_size;
> > +	u32 as_nr;
> > +	int slot;
> > +	int ret, i;
> > +
> > +	as_nr = job->mmu->as;
> > +	slot = panfrost_job_get_slot(job);
> > +
> > +	/* Only catch the first event, or when manually re-armed */
> > +	if (!panfrost_dump_core)
> > +		return;
> > +	panfrost_dump_core = false;
> > +
> > +	/* At least, we dump registers and end marker */
> > +	n_obj = 2;
> > +	n_bomap_pages = 0;
> > +	file_size = ARRAY_SIZE(panfrost_dump_registers) *
> > +			sizeof(struct panfrost_dump_registers);
> > +
> > +	/* Add in the active buffer objects */
> > +	for (i = 0; i < job->bo_count; i++) {
> > +		/*
> > +		 * Even though the CPU could be configured to use 16K or 64K pages, this
> > +		 * is a very unusual situation for most kernel setups on SoCs that have
> > +		 * a Panfrost device. Also many places across the driver make the somewhat
> > +		 * arbitrary assumption that Panfrost's MMU page size is the same as the CPU's,
> > +		 * so let's have a sanity check to ensure that's always the case
> > +		 */
> > +		WARN_ON(!IS_ALIGNED(dbo->size, PAGE_SIZE));
> 
> This warn needs to go after the assignment below - I just spent a few
> minutes tracking down why I was getting a NULL pointer dereference.
> Sadly my GCC doesn't seem to be able to warn about this uninitialised use :(

I just triggered the warning because of an unitialised garbage value. I suppose
last time I tried to rush the latest iteration of the patch without all the due
testing beforehand (won't happen again).

> > +
> > +		dbo = job->bos[i];
> > +		file_size += dbo->size;
> > +		n_bomap_pages += dbo->size >> PAGE_SHIFT;
> > +		n_obj++;
> > +	}

> > +	/* If we have any buffer objects, add a bomap object */
> > +	if (n_bomap_pages) {
> > +		file_size += n_bomap_pages * sizeof(*bomap);
> > +		n_obj++;
> > +	}
> > +
> > +	/* Add the size of the headers */
> > +	file_size += sizeof(*iter.hdr) * n_obj;
> > +
> > +	/*
> > +	 * Allocate the file in vmalloc memory, it's likely to be big.
> > +	 * The reason behind these GFP flags is that we don't want to trigger the
> > +	 * OOM killer in the event that not enough memory could be found for our
> > +	 * dump file. We also don't want the allocator to do any error reporting,
> > +	 * as the right behaviour is failing gracefully if a big enough buffer
> > +	 * could not be allocated.
> > +	 */
> > +	iter.start = __vmalloc(file_size, GFP_KERNEL | __GFP_NOWARN |
> > +			__GFP_NORETRY);
> > +	if (!iter.start) {
> > +		dev_warn(pfdev->dev, "failed to allocate devcoredump file\n");
> > +		return;
> > +	}
> > +
> > +	/* Point the data member after the headers */
> > +	iter.hdr = iter.start;
> > +	iter.data = &iter.hdr[n_obj];
> > +
> > +	memset(iter.hdr, 0, iter.data - iter.start);
> > +
> > +	/*
> > +	 * For now, we write the job identifier in the register dump header,
> > +	 * so that we can decode the entire dump later with pandecode
> > +	 */
> > +	iter.hdr->reghdr.jc = cpu_to_le64(job->jc);
> > +	iter.hdr->reghdr.major = cpu_to_le32(PANFROSTDUMP_MAJOR);
> > +	iter.hdr->reghdr.minor = cpu_to_le32(PANFROSTDUMP_MINOR);
> > +	iter.hdr->reghdr.gpu_id = cpu_to_le32(pfdev->features.id);
> > +	iter.hdr->reghdr.nbos = cpu_to_le64(job->bo_count);
> > +
> > +	panfrost_core_dump_registers(&iter, pfdev, as_nr, slot);
> > +
> > +	/* Reserve space for the bomap */
> > +	if (job->bo_count) {
> > +		bomap_start = bomap = iter.data;
> > +		memset(bomap, 0, sizeof(*bomap) * n_bomap_pages);
> > +		panfrost_core_dump_header(&iter, PANFROSTDUMP_BUF_BOMAP,
> > +					  bomap + n_bomap_pages);
> > +	}
> > +
> > +	for (i = 0; i < job->bo_count; i++) {
> > +		struct iosys_map map;
> > +		struct panfrost_gem_mapping *mapping;
> > +		struct panfrost_gem_object *bo;
> > +		struct sg_page_iter page_iter;
> > +		void *vaddr;
> > +
> > +		bo = to_panfrost_bo(job->bos[i]);
> > +		mapping = job->mappings[i];
> > +
> > +		if (!bo->base.sgt) {
> > +			dev_err(pfdev->dev, "Panfrost Dump: BO has no sgt, cannot dump\n");
> > +			iter.hdr->bomap.valid = 0;
> > +			goto dump_header;
> > +		}
> > +
> > +		ret = drm_gem_shmem_vmap(&bo->base, &map);
> > +		if (ret) {
> > +			dev_err(pfdev->dev, "Panfrost Dump: couldn't map Buffer Object\n");
> > +			iter.hdr->bomap.valid = 0;
> > +			goto dump_header;
> > +		}
> > +
> > +		WARN_ON(!mapping->active);
> > +
> > +		iter.hdr->bomap.data[0] = cpu_to_le32((bomap - bomap_start));
> > +
> > +		for_each_sgtable_page(bo->base.sgt, &page_iter, 0) {
> > +			struct page *page = sg_page_iter_page(&page_iter);
> > +
> > +			if (!IS_ERR(page)) {
> > +				*bomap++ = cpu_to_le64(page_to_phys(page));
> > +			} else {
> > +				dev_err(pfdev->dev, "Panfrost Dump: wrong page\n");
> > +				*bomap++ = ~cpu_to_le64(0);
> > +			}
> > +		}
> > +
> > +		iter.hdr->bomap.iova = cpu_to_le64(mapping->mmnode.start << PAGE_SHIFT);
> > +
> > +		vaddr = map.vaddr;
> > +		memcpy(iter.data, vaddr, bo->base.base.size);
> > +
> > +		drm_gem_shmem_vunmap(&bo->base, &map);
> > +
> > +		iter.hdr->bomap.valid = cpu_to_le64(1);
> 
> 'valid' is only 32 bit so this should be cpu_to_le32(1).

  Fixed.

> Steve

Adrian
diff mbox series

Patch

diff --git a/drivers/gpu/drm/panfrost/Kconfig b/drivers/gpu/drm/panfrost/Kconfig
index 86cdc0ce79e6..079600328be1 100644
--- a/drivers/gpu/drm/panfrost/Kconfig
+++ b/drivers/gpu/drm/panfrost/Kconfig
@@ -11,6 +11,7 @@  config DRM_PANFROST
 	select DRM_GEM_SHMEM_HELPER
 	select PM_DEVFREQ
 	select DEVFREQ_GOV_SIMPLE_ONDEMAND
+	select WANT_DEV_COREDUMP
 	help
 	  DRM driver for ARM Mali Midgard (T6xx, T7xx, T8xx) and
 	  Bifrost (G3x, G5x, G7x) GPUs.
diff --git a/drivers/gpu/drm/panfrost/Makefile b/drivers/gpu/drm/panfrost/Makefile
index b71935862417..7da2b3f02ed9 100644
--- a/drivers/gpu/drm/panfrost/Makefile
+++ b/drivers/gpu/drm/panfrost/Makefile
@@ -9,6 +9,7 @@  panfrost-y := \
 	panfrost_gpu.o \
 	panfrost_job.o \
 	panfrost_mmu.o \
-	panfrost_perfcnt.o
+	panfrost_perfcnt.o \
+	panfrost_dump.o
 
 obj-$(CONFIG_DRM_PANFROST) += panfrost.o
diff --git a/drivers/gpu/drm/panfrost/panfrost_dump.c b/drivers/gpu/drm/panfrost/panfrost_dump.c
new file mode 100644
index 000000000000..a710ed7bcefa
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_dump.c
@@ -0,0 +1,249 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2021 Collabora ltd. */
+
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/devcoredump.h>
+#include <linux/moduleparam.h>
+#include <linux/iosys-map.h>
+#include <drm/panfrost_drm.h>
+#include <drm/drm_device.h>
+
+#include "panfrost_job.h"
+#include "panfrost_gem.h"
+#include "panfrost_regs.h"
+#include "panfrost_dump.h"
+#include "panfrost_device.h"
+
+static bool panfrost_dump_core = true;
+module_param_named(dump_core, panfrost_dump_core, bool, 0600);
+
+struct panfrost_dump_iterator {
+	void *start;
+	struct panfrost_dump_object_header *hdr;
+	void *data;
+};
+
+static const unsigned short panfrost_dump_registers[] = {
+	SHADER_READY_LO,
+	SHADER_READY_HI,
+	TILER_READY_LO,
+	TILER_READY_HI,
+	L2_READY_LO,
+	L2_READY_HI,
+	JOB_INT_MASK,
+	JOB_INT_STAT,
+	JS_HEAD_LO(0),
+	JS_HEAD_HI(0),
+	JS_TAIL_LO(0),
+	JS_TAIL_HI(0),
+	JS_AFFINITY_LO(0),
+	JS_AFFINITY_HI(0),
+	JS_CONFIG(0),
+	JS_STATUS(0),
+	JS_HEAD_NEXT_LO(0),
+	JS_HEAD_NEXT_HI(0),
+	JS_AFFINITY_NEXT_LO(0),
+	JS_AFFINITY_NEXT_HI(0),
+	JS_CONFIG_NEXT(0),
+	MMU_INT_MASK,
+	MMU_INT_STAT,
+	AS_TRANSTAB_LO(0),
+	AS_TRANSTAB_HI(0),
+	AS_MEMATTR_LO(0),
+	AS_MEMATTR_HI(0),
+	AS_FAULTSTATUS(0),
+	AS_FAULTADDRESS_LO(0),
+	AS_FAULTADDRESS_HI(0),
+	AS_STATUS(0),
+};
+
+static void panfrost_core_dump_header(struct panfrost_dump_iterator *iter,
+	u32 type, void *data_end)
+{
+	struct panfrost_dump_object_header *hdr = iter->hdr;
+
+	hdr->magic = cpu_to_le32(PANFROSTDUMP_MAGIC);
+	hdr->type = cpu_to_le32(type);
+	hdr->file_offset = cpu_to_le32(iter->data - iter->start);
+	hdr->file_size = cpu_to_le32(data_end - iter->data);
+
+	iter->hdr++;
+	iter->data += le32_to_cpu(hdr->file_size);
+}
+
+static void
+panfrost_core_dump_registers(struct panfrost_dump_iterator *iter,
+			     struct panfrost_device *pfdev,
+			     u32 as_nr, int slot)
+{
+	struct panfrost_dump_registers *dumpreg = iter->data;
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(panfrost_dump_registers); i++, dumpreg++) {
+		unsigned int js_as_offset = 0;
+		unsigned int reg;
+
+		if (panfrost_dump_registers[i] >= JS_HEAD_LO(0) &&
+		    panfrost_dump_registers[i] <= JS_CONFIG_NEXT(0))
+			js_as_offset = slot * JS_SLOT_STRIDE;
+		else if (panfrost_dump_registers[i] >= AS_TRANSTAB_LO(0) &&
+			 panfrost_dump_registers[i] <= AS_STATUS(0))
+			js_as_offset = (as_nr << MMU_AS_SHIFT);
+
+		reg = panfrost_dump_registers[i] + js_as_offset;
+
+		dumpreg->reg = cpu_to_le32(reg);
+		dumpreg->value = cpu_to_le32(gpu_read(pfdev, reg));
+	}
+
+	panfrost_core_dump_header(iter, PANFROSTDUMP_BUF_REG, dumpreg);
+}
+
+void panfrost_core_dump(struct panfrost_job *job)
+{
+	struct panfrost_device *pfdev = job->pfdev;
+	struct panfrost_dump_iterator iter;
+	struct drm_gem_object *dbo;
+	unsigned int n_obj, n_bomap_pages;
+	__le64 *bomap, *bomap_start;
+	size_t file_size;
+	u32 as_nr;
+	int slot;
+	int ret, i;
+
+	as_nr = job->mmu->as;
+	slot = panfrost_job_get_slot(job);
+
+	/* Only catch the first event, or when manually re-armed */
+	if (!panfrost_dump_core)
+		return;
+	panfrost_dump_core = false;
+
+	/* At least, we dump registers and end marker */
+	n_obj = 2;
+	n_bomap_pages = 0;
+	file_size = ARRAY_SIZE(panfrost_dump_registers) *
+			sizeof(struct panfrost_dump_registers);
+
+	/* Add in the active buffer objects */
+	for (i = 0; i < job->bo_count; i++) {
+		/*
+		 * Even though the CPU could be configured to use 16K or 64K pages, this
+		 * is a very unusual situation for most kernel setups on SoCs that have
+		 * a Panfrost device. Also many places across the driver make the somewhat
+		 * arbitrary assumption that Panfrost's MMU page size is the same as the CPU's,
+		 * so let's have a sanity check to ensure that's always the case
+		 */
+		WARN_ON(!IS_ALIGNED(dbo->size, PAGE_SIZE));
+
+		dbo = job->bos[i];
+		file_size += dbo->size;
+		n_bomap_pages += dbo->size >> PAGE_SHIFT;
+		n_obj++;
+	}
+
+	/* If we have any buffer objects, add a bomap object */
+	if (n_bomap_pages) {
+		file_size += n_bomap_pages * sizeof(*bomap);
+		n_obj++;
+	}
+
+	/* Add the size of the headers */
+	file_size += sizeof(*iter.hdr) * n_obj;
+
+	/*
+	 * Allocate the file in vmalloc memory, it's likely to be big.
+	 * The reason behind these GFP flags is that we don't want to trigger the
+	 * OOM killer in the event that not enough memory could be found for our
+	 * dump file. We also don't want the allocator to do any error reporting,
+	 * as the right behaviour is failing gracefully if a big enough buffer
+	 * could not be allocated.
+	 */
+	iter.start = __vmalloc(file_size, GFP_KERNEL | __GFP_NOWARN |
+			__GFP_NORETRY);
+	if (!iter.start) {
+		dev_warn(pfdev->dev, "failed to allocate devcoredump file\n");
+		return;
+	}
+
+	/* Point the data member after the headers */
+	iter.hdr = iter.start;
+	iter.data = &iter.hdr[n_obj];
+
+	memset(iter.hdr, 0, iter.data - iter.start);
+
+	/*
+	 * For now, we write the job identifier in the register dump header,
+	 * so that we can decode the entire dump later with pandecode
+	 */
+	iter.hdr->reghdr.jc = cpu_to_le64(job->jc);
+	iter.hdr->reghdr.major = cpu_to_le32(PANFROSTDUMP_MAJOR);
+	iter.hdr->reghdr.minor = cpu_to_le32(PANFROSTDUMP_MINOR);
+	iter.hdr->reghdr.gpu_id = cpu_to_le32(pfdev->features.id);
+	iter.hdr->reghdr.nbos = cpu_to_le64(job->bo_count);
+
+	panfrost_core_dump_registers(&iter, pfdev, as_nr, slot);
+
+	/* Reserve space for the bomap */
+	if (job->bo_count) {
+		bomap_start = bomap = iter.data;
+		memset(bomap, 0, sizeof(*bomap) * n_bomap_pages);
+		panfrost_core_dump_header(&iter, PANFROSTDUMP_BUF_BOMAP,
+					  bomap + n_bomap_pages);
+	}
+
+	for (i = 0; i < job->bo_count; i++) {
+		struct iosys_map map;
+		struct panfrost_gem_mapping *mapping;
+		struct panfrost_gem_object *bo;
+		struct sg_page_iter page_iter;
+		void *vaddr;
+
+		bo = to_panfrost_bo(job->bos[i]);
+		mapping = job->mappings[i];
+
+		if (!bo->base.sgt) {
+			dev_err(pfdev->dev, "Panfrost Dump: BO has no sgt, cannot dump\n");
+			iter.hdr->bomap.valid = 0;
+			goto dump_header;
+		}
+
+		ret = drm_gem_shmem_vmap(&bo->base, &map);
+		if (ret) {
+			dev_err(pfdev->dev, "Panfrost Dump: couldn't map Buffer Object\n");
+			iter.hdr->bomap.valid = 0;
+			goto dump_header;
+		}
+
+		WARN_ON(!mapping->active);
+
+		iter.hdr->bomap.data[0] = cpu_to_le32((bomap - bomap_start));
+
+		for_each_sgtable_page(bo->base.sgt, &page_iter, 0) {
+			struct page *page = sg_page_iter_page(&page_iter);
+
+			if (!IS_ERR(page)) {
+				*bomap++ = cpu_to_le64(page_to_phys(page));
+			} else {
+				dev_err(pfdev->dev, "Panfrost Dump: wrong page\n");
+				*bomap++ = ~cpu_to_le64(0);
+			}
+		}
+
+		iter.hdr->bomap.iova = cpu_to_le64(mapping->mmnode.start << PAGE_SHIFT);
+
+		vaddr = map.vaddr;
+		memcpy(iter.data, vaddr, bo->base.base.size);
+
+		drm_gem_shmem_vunmap(&bo->base, &map);
+
+		iter.hdr->bomap.valid = cpu_to_le64(1);
+
+dump_header:	panfrost_core_dump_header(&iter, PANFROSTDUMP_BUF_BO, iter.data +
+					  bo->base.base.size);
+	}
+	panfrost_core_dump_header(&iter, PANFROSTDUMP_BUF_TRAILER, iter.data);
+
+	dev_coredumpv(pfdev->dev, iter.start, iter.data - iter.start, GFP_KERNEL);
+}
diff --git a/drivers/gpu/drm/panfrost/panfrost_dump.h b/drivers/gpu/drm/panfrost/panfrost_dump.h
new file mode 100644
index 000000000000..7d9bcefa5346
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_dump.h
@@ -0,0 +1,12 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2021 Collabora ltd.
+ */
+
+#ifndef PANFROST_DUMP_H
+#define PANFROST_DUMP_H
+
+struct panfrost_job;
+void panfrost_core_dump(struct panfrost_job *job);
+
+#endif
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index 7c4208476fbd..dbc597ab46fb 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -20,6 +20,7 @@ 
 #include "panfrost_regs.h"
 #include "panfrost_gpu.h"
 #include "panfrost_mmu.h"
+#include "panfrost_dump.h"
 
 #define JOB_TIMEOUT_MS 500
 
@@ -727,6 +728,8 @@  static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job
 		job_read(pfdev, JS_TAIL_LO(js)),
 		sched_job);
 
+	panfrost_core_dump(job);
+
 	atomic_set(&pfdev->reset.pending, 1);
 	panfrost_reset(pfdev, sched_job);
 
diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h
index 9e40277d8185..eac87310b348 100644
--- a/include/uapi/drm/panfrost_drm.h
+++ b/include/uapi/drm/panfrost_drm.h
@@ -224,6 +224,53 @@  struct drm_panfrost_madvise {
 	__u32 retained;       /* out, whether backing store still exists */
 };
 
+/* Definitions for coredump decoding in user space */
+#define PANFROSTDUMP_MAJOR 1
+#define PANFROSTDUMP_MINOR 0
+
+#define PANFROSTDUMP_MAGIC 0x464E4150 /* PANF */
+
+#define PANFROSTDUMP_BUF_REG 0
+#define PANFROSTDUMP_BUF_BOMAP (PANFROSTDUMP_BUF_REG + 1)
+#define PANFROSTDUMP_BUF_BO (PANFROSTDUMP_BUF_BOMAP + 1)
+#define PANFROSTDUMP_BUF_TRAILER (PANFROSTDUMP_BUF_BO + 1)
+
+struct panfrost_dump_object_header {
+	__le32 magic;
+	__le32 type;
+	__le32 file_size;
+	__le32 file_offset;
+
+	union {
+		struct pan_reg_hdr {
+			__le64 jc;
+			__le32 gpu_id;
+			__le32 major;
+			__le32 minor;
+			__le64 nbos;
+		} reghdr;
+
+		struct pan_bomap_hdr {
+			__le32 valid;
+			__le64 iova;
+			__le32 data[2];
+		} bomap;
+
+		/*
+		 * Force same size in case we want to expand the header
+		 * with new fields and also keep it 512-byte aligned
+		 */
+
+		__le32 sizer[496];
+	};
+};
+
+/* Registers object, an array of these */
+struct panfrost_dump_registers {
+	__le32 reg;
+	__le32 value;
+};
+
 #if defined(__cplusplus)
 }
 #endif