[13/13] drm/i915: Compress GPU objects in error state

Message ID	1470387964-3363-14-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Fri, 5 Aug 2016 10:06:04 +0100 Message-Id: <1470387964-3363-14-git-send-email-chris@chris-wilson.co.uk> In-Reply-To: <1470387964-3363-1-git-send-email-chris@chris-wilson.co.uk> References: <1470387964-3363-1-git-send-email-chris@chris-wilson.co.uk> Subject: [Intel-gfx] [PATCH 13/13] drm/i915: Compress GPU objects in error state Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Message ID

1470387964-3363-14-git-send-email-chris@chris-wilson.co.uk (mailing list archive)

State

New, archived

Headers

From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Date: Fri,  5 Aug 2016 10:06:04 +0100
Message-Id: <1470387964-3363-14-git-send-email-chris@chris-wilson.co.uk>
In-Reply-To: <1470387964-3363-1-git-send-email-chris@chris-wilson.co.uk>
References: <1470387964-3363-1-git-send-email-chris@chris-wilson.co.uk>
Subject: [Intel-gfx] [PATCH 13/13] drm/i915: Compress GPU objects in error
	state
Precedence: list
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
Errors-To: intel-gfx-bounces@lists.freedesktop.org
Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Commit Message

Chris Wilson Aug. 5, 2016, 9:06 a.m. UTC

Our error states are quickly growing, pinning kernel memory with them.
The majority of the space is taken up by the error objects. These
compress well using zlib and without decode are mostly meaningless, so
encoding them does not hinder quickly parsing the error state for
familiarity.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Kconfig          |   1 +
 drivers/gpu/drm/i915/i915_drv.h       |   4 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c   |  10 ++
 drivers/gpu/drm/i915/i915_gem_gtt.h   |   2 +
 drivers/gpu/drm/i915/i915_gpu_error.c | 271 +++++++++++++++++-----------------
 5 files changed, 148 insertions(+), 140 deletions(-)

Comments

Daniel Vetter Aug. 5, 2016, 6:56 p.m. UTC | #1

On Fri, Aug 05, 2016 at 10:06:04AM +0100, Chris Wilson wrote:
> Our error states are quickly growing, pinning kernel memory with them.
> The majority of the space is taken up by the error objects. These
> compress well using zlib and without decode are mostly meaningless, so
> encoding them does not hinder quickly parsing the error state for
> familiarity.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Seems to also contain a wholesale rework of the capture logic using a ggtt
mappable entry. Would explain why the missing clflush isn't an issue for
you. Imo best if that part is reordered as the first patch (or at least
before stop_machine, which requires the removal of cflush), and then the
zlib on top.

On the idea itself, since I have no clue: How do we uncompress these
again? Patched intel_error_decode, or can zlib deal with in-line streams?
-Daniel

> ---
>  drivers/gpu/drm/i915/Kconfig          |   1 +
>  drivers/gpu/drm/i915/i915_drv.h       |   4 +-
>  drivers/gpu/drm/i915/i915_gem_gtt.c   |  10 ++
>  drivers/gpu/drm/i915/i915_gem_gtt.h   |   2 +
>  drivers/gpu/drm/i915/i915_gpu_error.c | 271 +++++++++++++++++-----------------
>  5 files changed, 148 insertions(+), 140 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
> index 7badcee88ebf..c8ea20526aef 100644
> --- a/drivers/gpu/drm/i915/Kconfig
> +++ b/drivers/gpu/drm/i915/Kconfig
> @@ -5,6 +5,7 @@ config DRM_I915
>  	select INTEL_GTT
>  	select INTERVAL_TREE
>  	select STOP_MACHINE
> +	select ZLIB_DEFLATE
>  	# we need shmfs for the swappable backing store, and in particular
>  	# the shmem_readpage() which depends upon tmpfs
>  	select SHMEM
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 370a4c9eea70..edc1e6d6be0d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -534,6 +534,7 @@ struct drm_i915_error_state {
>  		u32 tail;
>  		u32 head;
>  		u32 ctl;
> +		u32 mode;
>  		u32 hws;
>  		u32 ipeir;
>  		u32 ipehr;
> @@ -550,9 +551,10 @@ struct drm_i915_error_state {
>  		u32 semaphore_mboxes[I915_NUM_ENGINES - 1];
>  
>  		struct drm_i915_error_object {
> -			int page_count;
>  			u64 gtt_offset;
>  			u64 gtt_size;
> +			int page_count;
> +			int unused;
>  			u32 *pages[0];
>  		} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 7c9f654d515a..c45e7f456cea 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2737,6 +2737,15 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
>  	if (ret)
>  		return ret;
>  
> +	/* Reserve a mappable slot for our lockless error capture */
> +	ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm,
> +						  &ggtt->gpu_error,
> +						  4096, 0, -1,
> +						  0, ggtt->mappable_end,
> +						  0, 0);
> +	if (ret)
> +		return ret;
> +
>  	/* Clear any non-preallocated blocks */
>  	drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) {
>  		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
> @@ -2804,6 +2813,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
>  	if (drm_mm_initialized(&ggtt->base.mm)) {
>  		intel_vgt_deballoon(dev_priv);
>  
> +		drm_mm_remove_node(&ggtt->gpu_error);
>  		drm_mm_takedown(&ggtt->base.mm);
>  		list_del(&ggtt->base.global_link);
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index c54ae2323df3..010e10c0b62b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -428,6 +428,8 @@ struct i915_ggtt {
>  	bool do_idle_maps;
>  
>  	int mtrr;
> +
> +	struct drm_mm_node gpu_error;
>  };
>  
>  struct i915_hw_ppgtt {
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 6078f47d4bc0..f036584e55e3 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -29,6 +29,7 @@
>  
>  #include <generated/utsrelease.h>
>  #include <linux/stop_machine.h>
> +#include <linux/zlib.h>
>  #include "i915_drv.h"
>  
>  static const char *engine_str(int engine)
> @@ -237,6 +238,8 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
>  	err_printf(m, "  HEAD:  0x%08x\n", ee->head);
>  	err_printf(m, "  TAIL:  0x%08x\n", ee->tail);
>  	err_printf(m, "  CTL:   0x%08x\n", ee->ctl);
> +	err_printf(m, "  MODE:  0x%08x [idle? %d]\n",
> +		   ee->mode, !!(ee->mode & MODE_IDLE));
>  	err_printf(m, "  HWS:   0x%08x\n", ee->hws);
>  	err_printf(m, "  ACTHD: 0x%08x %08x\n",
>  		   (u32)(ee->acthd>>32), (u32)ee->acthd);
> @@ -307,18 +310,46 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
>  	va_end(args);
>  }
>  
> +static bool
> +ascii85_encode(u32 in, char *out)
> +{
> +	int i;
> +
> +	if (in == 0)
> +		return false;
> +
> +	out[5] = '\0';
> +	for (i = 5; i--; ) {
> +		out[i] = '!' + in % 85;
> +		in /= 85;
> +	}
> +
> +	return true;
> +}
> +
>  static void print_error_obj(struct drm_i915_error_state_buf *m,
>  			    struct drm_i915_error_object *obj)
>  {
> -	int page, offset, elt;
> +	char out[6];
> +	int page;
> +
> +	err_puts(m, ":"); /* indicate compressed data */
> +	for (page = 0; page < obj->page_count; page++) {
> +		int i, len;
> +
> +		len = PAGE_SIZE;
> +		if (page == obj->page_count - 1)
> +			len -= obj->unused;
> +		len = (len + 3) / 4;
>  
> -	for (page = offset = 0; page < obj->page_count; page++) {
> -		for (elt = 0; elt < PAGE_SIZE/4; elt++) {
> -			err_printf(m, "%08x :  %08x\n", offset,
> -				   obj->pages[page][elt]);
> -			offset += 4;
> +		for (i = 0; i < len; i++) {
> +			if (ascii85_encode(obj->pages[page][i], out))
> +				err_puts(m, out);
> +			else
> +				err_puts(m, "z");
>  		}
>  	}
> +	err_puts(m, "\n");
>  }
>  
>  int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
> @@ -328,8 +359,8 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
>  	struct drm_i915_private *dev_priv = to_i915(dev);
>  	struct drm_i915_error_state *error = error_priv->error;
>  	struct drm_i915_error_object *obj;
> -	int i, j, offset, elt;
>  	int max_hangcheck_score;
> +	int i, j;
>  
>  	if (!error) {
>  		err_printf(m, "no error state collected\n");
> @@ -481,75 +512,33 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
>  		}
>  
>  		if ((obj = ee->ringbuffer)) {
> -			err_printf(m, "%s --- ringbuffer = 0x%08x\n",
> -				   dev_priv->engine[i].name,
> -				   lower_32_bits(obj->gtt_offset));
> +			err_printf(m, "%s --- ringbuffer = 0x%08llx\n",
> +				   dev_priv->engine[i].name, obj->gtt_offset);
>  			print_error_obj(m, obj);
>  		}
>  
> -		if ((obj = ee->hws_page)) {
> -			u64 hws_offset = obj->gtt_offset;
> -			u32 *hws_page = &obj->pages[0][0];
> -
> -			if (i915.enable_execlists) {
> -				hws_offset += LRC_PPHWSP_PN * PAGE_SIZE;
> -				hws_page = &obj->pages[LRC_PPHWSP_PN][0];
> -			}
> -			err_printf(m, "%s --- HW Status = 0x%08llx\n",
> -				   dev_priv->engine[i].name, hws_offset);
> -			offset = 0;
> -			for (elt = 0; elt < PAGE_SIZE/16; elt += 4) {
> -				err_printf(m, "[%04x] %08x %08x %08x %08x\n",
> -					   offset,
> -					   hws_page[elt],
> -					   hws_page[elt+1],
> -					   hws_page[elt+2],
> -					   hws_page[elt+3]);
> -				offset += 16;
> -			}
> +		if ((obj = ee->wa_ctx)) {
> +			err_printf(m, "%s --- WA Context = 0x%08llx\n",
> +				   dev_priv->engine[i].name, obj->gtt_offset);
> +			print_error_obj(m, obj);
>  		}
>  
> -		obj = ee->wa_ctx;
> -		if (obj) {
> -			u64 wa_ctx_offset = obj->gtt_offset;
> -			u32 *wa_ctx_page = &obj->pages[0][0];
> -			struct intel_engine_cs *engine = &dev_priv->engine[RCS];
> -			u32 wa_ctx_size = (engine->wa_ctx.indirect_ctx.size +
> -					   engine->wa_ctx.per_ctx.size);
> -
> -			err_printf(m, "%s --- WA ctx batch buffer = 0x%08llx\n",
> -				   dev_priv->engine[i].name, wa_ctx_offset);
> -			offset = 0;
> -			for (elt = 0; elt < wa_ctx_size; elt += 4) {
> -				err_printf(m, "[%04x] %08x %08x %08x %08x\n",
> -					   offset,
> -					   wa_ctx_page[elt + 0],
> -					   wa_ctx_page[elt + 1],
> -					   wa_ctx_page[elt + 2],
> -					   wa_ctx_page[elt + 3]);
> -				offset += 16;
> -			}
> +		if ((obj = ee->hws_page)) {
> +			err_printf(m, "%s --- HW Status = 0x%08llx\n",
> +				   dev_priv->engine[i].name, obj->gtt_offset);
> +			print_error_obj(m, obj);
>  		}
>  
>  		if ((obj = ee->ctx)) {
> -			err_printf(m, "%s --- HW Context = 0x%08x\n",
> -				   dev_priv->engine[i].name,
> -				   lower_32_bits(obj->gtt_offset));
> +			err_printf(m, "%s --- HW Context = 0x%08llx\n",
> +				   dev_priv->engine[i].name, obj->gtt_offset);
>  			print_error_obj(m, obj);
>  		}
>  	}
>  
>  	if ((obj = error->semaphore_obj)) {
> -		err_printf(m, "Semaphore page = 0x%08x\n",
> -			   lower_32_bits(obj->gtt_offset));
> -		for (elt = 0; elt < PAGE_SIZE/16; elt += 4) {
> -			err_printf(m, "[%04x] %08x %08x %08x %08x\n",
> -				   elt * 4,
> -				   obj->pages[0][elt],
> -				   obj->pages[0][elt+1],
> -				   obj->pages[0][elt+2],
> -				   obj->pages[0][elt+3]);
> -		}
> +		err_printf(m, "Semaphore page = 0x%08llx\n", obj->gtt_offset);
> +		print_error_obj(m, obj);
>  	}
>  
>  	if (error->overlay)
> @@ -605,7 +594,7 @@ static void i915_error_object_free(struct drm_i915_error_object *obj)
>  		return;
>  
>  	for (page = 0; page < obj->page_count; page++)
> -		kfree(obj->pages[page]);
> +		free_page((unsigned long)obj->pages[page]);
>  
>  	kfree(obj);
>  }
> @@ -641,98 +630,107 @@ static void i915_error_state_free(struct kref *error_ref)
>  	kfree(error);
>  }
>  
> +static int compress_page(struct z_stream_s *zstream,
> +			 void *src,
> +			 struct drm_i915_error_object *dst)
> +{
> +	zstream->next_in = src;
> +	zstream->avail_in = PAGE_SIZE;
> +
> +	do {
> +		if (zstream->avail_out == 0) {
> +			unsigned long page;
> +
> +			page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
> +			if (!page)
> +				return -ENOMEM;
> +
> +			dst->pages[dst->page_count++] = (void *)page;
> +
> +			zstream->next_out = (void *)page;
> +			zstream->avail_out = PAGE_SIZE;
> +		}
> +
> +		if (zlib_deflate(zstream, Z_SYNC_FLUSH) != Z_OK)
> +			return -EIO;
> +
> +#if 0
> +		if (zstream->total_out > zstream->total_in)
> +			return -E2BIG;
> +#endif
> +	} while (zstream->avail_in);
> +
> +	return 0;
> +}
> +
>  static struct drm_i915_error_object *
> -i915_error_object_create(struct drm_i915_private *dev_priv,
> +i915_error_object_create(struct drm_i915_private *i915,
>  			 struct i915_vma *vma)
>  {
> -	struct i915_ggtt *ggtt = &dev_priv->ggtt;
> -	struct drm_i915_gem_object *src;
> +	struct i915_ggtt *ggtt = &i915->ggtt;
> +	const u64 slot = ggtt->gpu_error.start;
>  	struct drm_i915_error_object *dst;
> -	int num_pages;
> -	bool use_ggtt;
> -	int i = 0;
> -	u64 reloc_offset;
> +	struct z_stream_s zstream;
> +	unsigned long num_pages;
> +	struct sgt_iter iter;
> +	dma_addr_t dma;
>  
>  	if (!vma)
>  		return NULL;
>  
> -	src = vma->obj;
> -	if (!src->pages)
> -		return NULL;
> -
> -	num_pages = src->base.size >> PAGE_SHIFT;
> -
> -	dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), GFP_ATOMIC);
> +	num_pages = vma->size >> PAGE_SHIFT;
> +	num_pages = 10 * num_pages * sizeof(u32 *) >> 3;
> +	dst = kmalloc(sizeof(*dst) + num_pages, GFP_ATOMIC | __GFP_NOWARN);
>  	if (!dst)
>  		return NULL;
>  
>  	dst->gtt_offset = vma->node.start;
> -	dst->gtt_size = vma->node.size;
> +	dst->page_count = 0;
> +	dst->unused = 0;
> +
> +	memset(&zstream, 0, sizeof(zstream));
> +	zstream.workspace = kmalloc(zlib_deflate_workspacesize(MAX_WBITS,
> +							       MAX_MEM_LEVEL),
> +				    GFP_ATOMIC | __GFP_NOWARN);
> +	if (!zstream.workspace ||
> +	    zlib_deflateInit(&zstream, Z_DEFAULT_COMPRESSION) != Z_OK) {
> +		kfree(zstream.workspace);
> +		kfree(dst);
> +		return NULL;
> +	}
>  
> -	reloc_offset = dst->gtt_offset;
> -	use_ggtt = (src->cache_level == I915_CACHE_NONE &&
> -		   (vma->flags & I915_VMA_GLOBAL_BIND) &&
> -		   reloc_offset + num_pages * PAGE_SIZE <= ggtt->mappable_end);
> +	for_each_sgt_dma(dma, iter,
> +			 vma->ggtt_view.pages ?: vma->obj->pages) {
> +		int ret;
> +		void *s;
>  
> -	/* Cannot access stolen address directly, try to use the aperture */
> -	if (src->stolen) {
> -		use_ggtt = true;
> +		ggtt->base.insert_page(&ggtt->base, dma, slot,
> +				       I915_CACHE_NONE, 0);
>  
> -		if (!(vma->flags & I915_VMA_GLOBAL_BIND))
> -			goto unwind;
> +		s = (void *__force)
> +			io_mapping_map_atomic_wc(ggtt->mappable, slot);
> +		ret = compress_page(&zstream, s, dst);
> +		io_mapping_unmap_atomic(s);
>  
> -		reloc_offset = vma->node.start;
> -		if (reloc_offset + num_pages * PAGE_SIZE > ggtt->mappable_end)
> +		if (ret)
>  			goto unwind;
>  	}
> +	zlib_deflate(&zstream, Z_FINISH);
> +	dst->unused = zstream.avail_out;
> +out:
> +	zlib_deflateEnd(&zstream);
> +	kfree(zstream.workspace);
>  
> -	/* Cannot access snooped pages through the aperture */
> -	if (use_ggtt && src->cache_level != I915_CACHE_NONE &&
> -	    !HAS_LLC(dev_priv))
> -		goto unwind;
> -
> -	dst->page_count = num_pages;
> -	while (num_pages--) {
> -		void *d;
> -
> -		d = kmalloc(PAGE_SIZE, GFP_ATOMIC);
> -		if (d == NULL)
> -			goto unwind;
> -
> -		if (use_ggtt) {
> -			void __iomem *s;
> -
> -			/* Simply ignore tiling or any overlapping fence.
> -			 * It's part of the error state, and this hopefully
> -			 * captures what the GPU read.
> -			 */
> -
> -			s = io_mapping_map_atomic_wc(ggtt->mappable,
> -						     reloc_offset);
> -			memcpy_fromio(d, s, PAGE_SIZE);
> -			io_mapping_unmap_atomic(s);
> -		} else {
> -			struct page *page;
> -			void *s;
> -
> -			page = i915_gem_object_get_page(src, i);
> -
> -			s = kmap_atomic(page);
> -			memcpy(d, s, PAGE_SIZE);
> -			kunmap_atomic(s);
> -		}
> -
> -		dst->pages[i++] = d;
> -		reloc_offset += PAGE_SIZE;
> -	}
> +	ggtt->base.clear_range(&ggtt->base, slot, PAGE_SIZE, true);
>  
>  	return dst;
>  
>  unwind:
> -	while (i--)
> -		kfree(dst->pages[i]);
> +	while (dst->page_count--)
> +		free_page((unsigned long)dst->pages[dst->page_count]);
>  	kfree(dst);
> -	return NULL;
> +	dst = NULL;
> +	goto out;
>  }
>  
>  /* The error capture is special as tries to run underneath the normal
> @@ -979,6 +977,8 @@ static void error_record_engine_registers(struct drm_i915_error_state *error,
>  	ee->head = I915_READ_HEAD(engine);
>  	ee->tail = I915_READ_TAIL(engine);
>  	ee->ctl = I915_READ_CTL(engine);
> +	if (INTEL_GEN(dev_priv) > 2)
> +		ee->mode = I915_READ_MODE(engine);
>  
>  	if (I915_NEED_GFX_HWS(dev_priv)) {
>  		i915_reg_t mmio;
> @@ -1367,9 +1367,6 @@ static int capture(void *data)
>  {
>  	struct drm_i915_error_state *error = data;
>  
> -	/* Ensure that what we readback from memory matches what the GPU sees */
> -	wbinvd();
> -
>  	i915_capture_gen_state(error->i915, error);
>  	i915_capture_reg_state(error->i915, error);
>  	i915_gem_record_fences(error->i915, error);
> @@ -1383,9 +1380,6 @@ static int capture(void *data)
>  	error->overlay = intel_overlay_capture_error_state(error->i915);
>  	error->display = intel_display_capture_error_state(error->i915);
>  
> -	/* And make sure we don't leave trash in the CPU cache */
> -	wbinvd();
> -
>  	return 0;
>  }
>  
> @@ -1459,7 +1453,6 @@ void i915_error_state_get(struct drm_device *dev,
>  	if (error_priv->error)
>  		kref_get(&error_priv->error->ref);
>  	spin_unlock_irq(&dev_priv->gpu_error.lock);
> -
>  }
>  
>  void i915_error_state_put(struct i915_error_state_file_priv *error_priv)
> -- 
> 2.8.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Chris Wilson Aug. 5, 2016, 7:15 p.m. UTC | #2

On Fri, Aug 05, 2016 at 08:56:57PM +0200, Daniel Vetter wrote:
> On Fri, Aug 05, 2016 at 10:06:04AM +0100, Chris Wilson wrote:
> > Our error states are quickly growing, pinning kernel memory with them.
> > The majority of the space is taken up by the error objects. These
> > compress well using zlib and without decode are mostly meaningless, so
> > encoding them does not hinder quickly parsing the error state for
> > familiarity.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Seems to also contain a wholesale rework of the capture logic using a ggtt
> mappable entry. Would explain why the missing clflush isn't an issue for
> you.

No. clflush was replaced by wbinvd, which is dropped here because it is
no longer required.

> Imo best if that part is reordered as the first patch (or at least
> before stop_machine, which requires the removal of cflush), and then the
> zlib on top.
> 
> On the idea itself, since I have no clue: How do we uncompress these
> again? Patched intel_error_decode, or can zlib deal with in-line streams?

intel_error_decode was updated to handle the compressed streams.

commit d4c3cd4d04ab7f317d3429708d19cd5fc4d0f5fa
Author:     Chris Wilson <chris@chris-wilson.co.uk>
AuthorDate: Fri Oct 31 11:27:21 2014 +0000
Commit:     Chris Wilson <chris@chris-wilson.co.uk>
CommitDate: Thu Dec 31 22:10:11 2015 +0000

    intel_error_decode: Inflate compressed error state

-Chris

Daniel Vetter Aug. 5, 2016, 7:58 p.m. UTC | #3

On Fri, Aug 05, 2016 at 08:15:42PM +0100, Chris Wilson wrote:
> On Fri, Aug 05, 2016 at 08:56:57PM +0200, Daniel Vetter wrote:
> > On Fri, Aug 05, 2016 at 10:06:04AM +0100, Chris Wilson wrote:
> > > Our error states are quickly growing, pinning kernel memory with them.
> > > The majority of the space is taken up by the error objects. These
> > > compress well using zlib and without decode are mostly meaningless, so
> > > encoding them does not hinder quickly parsing the error state for
> > > familiarity.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > 
> > Seems to also contain a wholesale rework of the capture logic using a ggtt
> > mappable entry. Would explain why the missing clflush isn't an issue for
> > you.
> 
> No. clflush was replaced by wbinvd, which is dropped here because it is
> no longer required.

Indeed, I missed that, makes all sense. Still should be split up I think
since it's quite a bit a change.

> > Imo best if that part is reordered as the first patch (or at least
> > before stop_machine, which requires the removal of cflush), and then the
> > zlib on top.
> > 
> > On the idea itself, since I have no clue: How do we uncompress these
> > again? Patched intel_error_decode, or can zlib deal with in-line streams?
> 
> intel_error_decode was updated to handle the compressed streams.
> 
> commit d4c3cd4d04ab7f317d3429708d19cd5fc4d0f5fa
> Author:     Chris Wilson <chris@chris-wilson.co.uk>
> AuthorDate: Fri Oct 31 11:27:21 2014 +0000
> Commit:     Chris Wilson <chris@chris-wilson.co.uk>
> CommitDate: Thu Dec 31 22:10:11 2015 +0000
> 
>     intel_error_decode: Inflate compressed error state

Oh, entirely missed that too ;-) Still would be good to add that to the
commit message.
-Daniel

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 7badcee88ebf..c8ea20526aef 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -5,6 +5,7 @@  config DRM_I915
 	select INTEL_GTT
 	select INTERVAL_TREE
 	select STOP_MACHINE
+	select ZLIB_DEFLATE
 	# we need shmfs for the swappable backing store, and in particular
 	# the shmem_readpage() which depends upon tmpfs
 	select SHMEM
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 370a4c9eea70..edc1e6d6be0d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -534,6 +534,7 @@  struct drm_i915_error_state {
 		u32 tail;
 		u32 head;
 		u32 ctl;
+		u32 mode;
 		u32 hws;
 		u32 ipeir;
 		u32 ipehr;
@@ -550,9 +551,10 @@  struct drm_i915_error_state {
 		u32 semaphore_mboxes[I915_NUM_ENGINES - 1];
 
 		struct drm_i915_error_object {
-			int page_count;
 			u64 gtt_offset;
 			u64 gtt_size;
+			int page_count;
+			int unused;
 			u32 *pages[0];
 		} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 7c9f654d515a..c45e7f456cea 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2737,6 +2737,15 @@  int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
 	if (ret)
 		return ret;
 
+	/* Reserve a mappable slot for our lockless error capture */
+	ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm,
+						  &ggtt->gpu_error,
+						  4096, 0, -1,
+						  0, ggtt->mappable_end,
+						  0, 0);
+	if (ret)
+		return ret;
+
 	/* Clear any non-preallocated blocks */
 	drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) {
 		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
@@ -2804,6 +2813,7 @@  void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
 	if (drm_mm_initialized(&ggtt->base.mm)) {
 		intel_vgt_deballoon(dev_priv);
 
+		drm_mm_remove_node(&ggtt->gpu_error);
 		drm_mm_takedown(&ggtt->base.mm);
 		list_del(&ggtt->base.global_link);
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index c54ae2323df3..010e10c0b62b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -428,6 +428,8 @@  struct i915_ggtt {
 	bool do_idle_maps;
 
 	int mtrr;
+
+	struct drm_mm_node gpu_error;
 };
 
 struct i915_hw_ppgtt {
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 6078f47d4bc0..f036584e55e3 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -29,6 +29,7 @@ 
 
 #include <generated/utsrelease.h>
 #include <linux/stop_machine.h>
+#include <linux/zlib.h>
 #include "i915_drv.h"
 
 static const char *engine_str(int engine)
@@ -237,6 +238,8 @@  static void error_print_engine(struct drm_i915_error_state_buf *m,
 	err_printf(m, "  HEAD:  0x%08x\n", ee->head);
 	err_printf(m, "  TAIL:  0x%08x\n", ee->tail);
 	err_printf(m, "  CTL:   0x%08x\n", ee->ctl);
+	err_printf(m, "  MODE:  0x%08x [idle? %d]\n",
+		   ee->mode, !!(ee->mode & MODE_IDLE));
 	err_printf(m, "  HWS:   0x%08x\n", ee->hws);
 	err_printf(m, "  ACTHD: 0x%08x %08x\n",
 		   (u32)(ee->acthd>>32), (u32)ee->acthd);
@@ -307,18 +310,46 @@  void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
 	va_end(args);
 }
 
+static bool
+ascii85_encode(u32 in, char *out)
+{
+	int i;
+
+	if (in == 0)
+		return false;
+
+	out[5] = '\0';
+	for (i = 5; i--; ) {
+		out[i] = '!' + in % 85;
+		in /= 85;
+	}
+
+	return true;
+}
+
 static void print_error_obj(struct drm_i915_error_state_buf *m,
 			    struct drm_i915_error_object *obj)
 {
-	int page, offset, elt;
+	char out[6];
+	int page;
+
+	err_puts(m, ":"); /* indicate compressed data */
+	for (page = 0; page < obj->page_count; page++) {
+		int i, len;
+
+		len = PAGE_SIZE;
+		if (page == obj->page_count - 1)
+			len -= obj->unused;
+		len = (len + 3) / 4;
 
-	for (page = offset = 0; page < obj->page_count; page++) {
-		for (elt = 0; elt < PAGE_SIZE/4; elt++) {
-			err_printf(m, "%08x :  %08x\n", offset,
-				   obj->pages[page][elt]);
-			offset += 4;
+		for (i = 0; i < len; i++) {
+			if (ascii85_encode(obj->pages[page][i], out))
+				err_puts(m, out);
+			else
+				err_puts(m, "z");
 		}
 	}
+	err_puts(m, "\n");
 }
 
 int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
@@ -328,8 +359,8 @@  int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_i915_error_state *error = error_priv->error;
 	struct drm_i915_error_object *obj;
-	int i, j, offset, elt;
 	int max_hangcheck_score;
+	int i, j;
 
 	if (!error) {
 		err_printf(m, "no error state collected\n");
@@ -481,75 +512,33 @@  int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 		}
 
 		if ((obj = ee->ringbuffer)) {
-			err_printf(m, "%s --- ringbuffer = 0x%08x\n",
-				   dev_priv->engine[i].name,
-				   lower_32_bits(obj->gtt_offset));
+			err_printf(m, "%s --- ringbuffer = 0x%08llx\n",
+				   dev_priv->engine[i].name, obj->gtt_offset);
 			print_error_obj(m, obj);
 		}
 
-		if ((obj = ee->hws_page)) {
-			u64 hws_offset = obj->gtt_offset;
-			u32 *hws_page = &obj->pages[0][0];
-
-			if (i915.enable_execlists) {
-				hws_offset += LRC_PPHWSP_PN * PAGE_SIZE;
-				hws_page = &obj->pages[LRC_PPHWSP_PN][0];
-			}
-			err_printf(m, "%s --- HW Status = 0x%08llx\n",
-				   dev_priv->engine[i].name, hws_offset);
-			offset = 0;
-			for (elt = 0; elt < PAGE_SIZE/16; elt += 4) {
-				err_printf(m, "[%04x] %08x %08x %08x %08x\n",
-					   offset,
-					   hws_page[elt],
-					   hws_page[elt+1],
-					   hws_page[elt+2],
-					   hws_page[elt+3]);
-				offset += 16;
-			}
+		if ((obj = ee->wa_ctx)) {
+			err_printf(m, "%s --- WA Context = 0x%08llx\n",
+				   dev_priv->engine[i].name, obj->gtt_offset);
+			print_error_obj(m, obj);
 		}
 
-		obj = ee->wa_ctx;
-		if (obj) {
-			u64 wa_ctx_offset = obj->gtt_offset;
-			u32 *wa_ctx_page = &obj->pages[0][0];
-			struct intel_engine_cs *engine = &dev_priv->engine[RCS];
-			u32 wa_ctx_size = (engine->wa_ctx.indirect_ctx.size +
-					   engine->wa_ctx.per_ctx.size);
-
-			err_printf(m, "%s --- WA ctx batch buffer = 0x%08llx\n",
-				   dev_priv->engine[i].name, wa_ctx_offset);
-			offset = 0;
-			for (elt = 0; elt < wa_ctx_size; elt += 4) {
-				err_printf(m, "[%04x] %08x %08x %08x %08x\n",
-					   offset,
-					   wa_ctx_page[elt + 0],
-					   wa_ctx_page[elt + 1],
-					   wa_ctx_page[elt + 2],
-					   wa_ctx_page[elt + 3]);
-				offset += 16;
-			}
+		if ((obj = ee->hws_page)) {
+			err_printf(m, "%s --- HW Status = 0x%08llx\n",
+				   dev_priv->engine[i].name, obj->gtt_offset);
+			print_error_obj(m, obj);
 		}
 
 		if ((obj = ee->ctx)) {
-			err_printf(m, "%s --- HW Context = 0x%08x\n",
-				   dev_priv->engine[i].name,
-				   lower_32_bits(obj->gtt_offset));
+			err_printf(m, "%s --- HW Context = 0x%08llx\n",
+				   dev_priv->engine[i].name, obj->gtt_offset);
 			print_error_obj(m, obj);
 		}
 	}
 
 	if ((obj = error->semaphore_obj)) {
-		err_printf(m, "Semaphore page = 0x%08x\n",
-			   lower_32_bits(obj->gtt_offset));
-		for (elt = 0; elt < PAGE_SIZE/16; elt += 4) {
-			err_printf(m, "[%04x] %08x %08x %08x %08x\n",
-				   elt * 4,
-				   obj->pages[0][elt],
-				   obj->pages[0][elt+1],
-				   obj->pages[0][elt+2],
-				   obj->pages[0][elt+3]);
-		}
+		err_printf(m, "Semaphore page = 0x%08llx\n", obj->gtt_offset);
+		print_error_obj(m, obj);
 	}
 
 	if (error->overlay)
@@ -605,7 +594,7 @@  static void i915_error_object_free(struct drm_i915_error_object *obj)
 		return;
 
 	for (page = 0; page < obj->page_count; page++)
-		kfree(obj->pages[page]);
+		free_page((unsigned long)obj->pages[page]);
 
 	kfree(obj);
 }
@@ -641,98 +630,107 @@  static void i915_error_state_free(struct kref *error_ref)
 	kfree(error);
 }
 
+static int compress_page(struct z_stream_s *zstream,
+			 void *src,
+			 struct drm_i915_error_object *dst)
+{
+	zstream->next_in = src;
+	zstream->avail_in = PAGE_SIZE;
+
+	do {
+		if (zstream->avail_out == 0) {
+			unsigned long page;
+
+			page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
+			if (!page)
+				return -ENOMEM;
+
+			dst->pages[dst->page_count++] = (void *)page;
+
+			zstream->next_out = (void *)page;
+			zstream->avail_out = PAGE_SIZE;
+		}
+
+		if (zlib_deflate(zstream, Z_SYNC_FLUSH) != Z_OK)
+			return -EIO;
+
+#if 0
+		if (zstream->total_out > zstream->total_in)
+			return -E2BIG;
+#endif
+	} while (zstream->avail_in);
+
+	return 0;
+}
+
 static struct drm_i915_error_object *
-i915_error_object_create(struct drm_i915_private *dev_priv,
+i915_error_object_create(struct drm_i915_private *i915,
 			 struct i915_vma *vma)
 {
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-	struct drm_i915_gem_object *src;
+	struct i915_ggtt *ggtt = &i915->ggtt;
+	const u64 slot = ggtt->gpu_error.start;
 	struct drm_i915_error_object *dst;
-	int num_pages;
-	bool use_ggtt;
-	int i = 0;
-	u64 reloc_offset;
+	struct z_stream_s zstream;
+	unsigned long num_pages;
+	struct sgt_iter iter;
+	dma_addr_t dma;
 
 	if (!vma)
 		return NULL;
 
-	src = vma->obj;
-	if (!src->pages)
-		return NULL;
-
-	num_pages = src->base.size >> PAGE_SHIFT;
-
-	dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), GFP_ATOMIC);
+	num_pages = vma->size >> PAGE_SHIFT;
+	num_pages = 10 * num_pages * sizeof(u32 *) >> 3;
+	dst = kmalloc(sizeof(*dst) + num_pages, GFP_ATOMIC | __GFP_NOWARN);
 	if (!dst)
 		return NULL;
 
 	dst->gtt_offset = vma->node.start;
-	dst->gtt_size = vma->node.size;
+	dst->page_count = 0;
+	dst->unused = 0;
+
+	memset(&zstream, 0, sizeof(zstream));
+	zstream.workspace = kmalloc(zlib_deflate_workspacesize(MAX_WBITS,
+							       MAX_MEM_LEVEL),
+				    GFP_ATOMIC | __GFP_NOWARN);
+	if (!zstream.workspace ||
+	    zlib_deflateInit(&zstream, Z_DEFAULT_COMPRESSION) != Z_OK) {
+		kfree(zstream.workspace);
+		kfree(dst);
+		return NULL;
+	}
 
-	reloc_offset = dst->gtt_offset;
-	use_ggtt = (src->cache_level == I915_CACHE_NONE &&
-		   (vma->flags & I915_VMA_GLOBAL_BIND) &&
-		   reloc_offset + num_pages * PAGE_SIZE <= ggtt->mappable_end);
+	for_each_sgt_dma(dma, iter,
+			 vma->ggtt_view.pages ?: vma->obj->pages) {
+		int ret;
+		void *s;
 
-	/* Cannot access stolen address directly, try to use the aperture */
-	if (src->stolen) {
-		use_ggtt = true;
+		ggtt->base.insert_page(&ggtt->base, dma, slot,
+				       I915_CACHE_NONE, 0);
 
-		if (!(vma->flags & I915_VMA_GLOBAL_BIND))
-			goto unwind;
+		s = (void *__force)
+			io_mapping_map_atomic_wc(ggtt->mappable, slot);
+		ret = compress_page(&zstream, s, dst);
+		io_mapping_unmap_atomic(s);
 
-		reloc_offset = vma->node.start;
-		if (reloc_offset + num_pages * PAGE_SIZE > ggtt->mappable_end)
+		if (ret)
 			goto unwind;
 	}
+	zlib_deflate(&zstream, Z_FINISH);
+	dst->unused = zstream.avail_out;
+out:
+	zlib_deflateEnd(&zstream);
+	kfree(zstream.workspace);
 
-	/* Cannot access snooped pages through the aperture */
-	if (use_ggtt && src->cache_level != I915_CACHE_NONE &&
-	    !HAS_LLC(dev_priv))
-		goto unwind;
-
-	dst->page_count = num_pages;
-	while (num_pages--) {
-		void *d;
-
-		d = kmalloc(PAGE_SIZE, GFP_ATOMIC);
-		if (d == NULL)
-			goto unwind;
-
-		if (use_ggtt) {
-			void __iomem *s;
-
-			/* Simply ignore tiling or any overlapping fence.
-			 * It's part of the error state, and this hopefully
-			 * captures what the GPU read.
-			 */
-
-			s = io_mapping_map_atomic_wc(ggtt->mappable,
-						     reloc_offset);
-			memcpy_fromio(d, s, PAGE_SIZE);
-			io_mapping_unmap_atomic(s);
-		} else {
-			struct page *page;
-			void *s;
-
-			page = i915_gem_object_get_page(src, i);
-
-			s = kmap_atomic(page);
-			memcpy(d, s, PAGE_SIZE);
-			kunmap_atomic(s);
-		}
-
-		dst->pages[i++] = d;
-		reloc_offset += PAGE_SIZE;
-	}
+	ggtt->base.clear_range(&ggtt->base, slot, PAGE_SIZE, true);
 
 	return dst;
 
 unwind:
-	while (i--)
-		kfree(dst->pages[i]);
+	while (dst->page_count--)
+		free_page((unsigned long)dst->pages[dst->page_count]);
 	kfree(dst);
-	return NULL;
+	dst = NULL;
+	goto out;
 }
 
 /* The error capture is special as tries to run underneath the normal
@@ -979,6 +977,8 @@  static void error_record_engine_registers(struct drm_i915_error_state *error,
 	ee->head = I915_READ_HEAD(engine);
 	ee->tail = I915_READ_TAIL(engine);
 	ee->ctl = I915_READ_CTL(engine);
+	if (INTEL_GEN(dev_priv) > 2)
+		ee->mode = I915_READ_MODE(engine);
 
 	if (I915_NEED_GFX_HWS(dev_priv)) {
 		i915_reg_t mmio;
@@ -1367,9 +1367,6 @@  static int capture(void *data)
 {
 	struct drm_i915_error_state *error = data;
 
-	/* Ensure that what we readback from memory matches what the GPU sees */
-	wbinvd();
-
 	i915_capture_gen_state(error->i915, error);
 	i915_capture_reg_state(error->i915, error);
 	i915_gem_record_fences(error->i915, error);
@@ -1383,9 +1380,6 @@  static int capture(void *data)
 	error->overlay = intel_overlay_capture_error_state(error->i915);
 	error->display = intel_display_capture_error_state(error->i915);
 
-	/* And make sure we don't leave trash in the CPU cache */
-	wbinvd();
-
 	return 0;
 }
 
@@ -1459,7 +1453,6 @@  void i915_error_state_get(struct drm_device *dev,
 	if (error_priv->error)
 		kref_get(&error_priv->error->ref);
 	spin_unlock_irq(&dev_priv->gpu_error.lock);
-
 }
 
 void i915_error_state_put(struct i915_error_state_file_priv *error_priv)