[v3] drm/i915/gt: Poison GTT scratch pages
diff mbox series

Message ID 20200123111816.1292582-1-chris@chris-wilson.co.uk
State New
Headers show
Series
  • [v3] drm/i915/gt: Poison GTT scratch pages
Related show

Commit Message

Chris Wilson Jan. 23, 2020, 11:18 a.m. UTC
Using a clear page for scratch means that we have relatively benign
errors in case it is accidentally used, but that can be rather too
benign for debugging. If we poison the scratch, ideally it quickly
results in an obvious error.

v2: Set each page individually just in case we are using highmem for our
scratch page.
v3: Pick a new scratch register as MI_STORE_REGISTER_MEM does not work
with GPR0 on gen7, unbelievably.

Suggested-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
---
 .../drm/i915/gem/selftests/i915_gem_context.c | 51 ++++++++++++++++---
 drivers/gpu/drm/i915/gt/intel_gtt.c           | 30 +++++++++++
 2 files changed, 75 insertions(+), 6 deletions(-)

Comments

Mika Kuoppala Jan. 23, 2020, 11:56 a.m. UTC | #1
Chris Wilson <chris@chris-wilson.co.uk> writes:

> Using a clear page for scratch means that we have relatively benign
> errors in case it is accidentally used, but that can be rather too
> benign for debugging. If we poison the scratch, ideally it quickly
> results in an obvious error.
>
> v2: Set each page individually just in case we are using highmem for our
> scratch page.
> v3: Pick a new scratch register as MI_STORE_REGISTER_MEM does not work
> with GPR0 on gen7, unbelievably.
>
> Suggested-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

I have a faint memory...aeons ago..might have.

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Matthew Auld <matthew.william.auld@gmail.com>
> ---
>  .../drm/i915/gem/selftests/i915_gem_context.c | 51 ++++++++++++++++---
>  drivers/gpu/drm/i915/gt/intel_gtt.c           | 30 +++++++++++
>  2 files changed, 75 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> index 7fc46861a54d..00a56a8b309a 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> @@ -1575,7 +1575,6 @@ static int read_from_scratch(struct i915_gem_context *ctx,
>  	struct drm_i915_private *i915 = ctx->i915;
>  	struct drm_i915_gem_object *obj;
>  	struct i915_address_space *vm;
> -	const u32 RCS_GPR0 = 0x2600; /* not all engines have their own GPR! */
>  	const u32 result = 0x100;
>  	struct i915_request *rq;
>  	struct i915_vma *vma;
> @@ -1596,20 +1595,24 @@ static int read_from_scratch(struct i915_gem_context *ctx,
>  
>  	memset(cmd, POISON_INUSE, PAGE_SIZE);
>  	if (INTEL_GEN(i915) >= 8) {
> +		const u32 GPR0 = engine->mmio_base + 0x600;
> +
>  		*cmd++ = MI_LOAD_REGISTER_MEM_GEN8;
> -		*cmd++ = RCS_GPR0;
> +		*cmd++ = GPR0;
>  		*cmd++ = lower_32_bits(offset);
>  		*cmd++ = upper_32_bits(offset);
>  		*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
> -		*cmd++ = RCS_GPR0;
> +		*cmd++ = GPR0;
>  		*cmd++ = result;
>  		*cmd++ = 0;
>  	} else {
> +		const u32 reg = engine->mmio_base + 0x420;

3d prim end offset? Well should not matter for this selftest
but did you check 0xA198?

How have 0x600 been been working in gen7 previously?

-Mika
> +
>  		*cmd++ = MI_LOAD_REGISTER_MEM;
> -		*cmd++ = RCS_GPR0;
> +		*cmd++ = reg;
>  		*cmd++ = offset;
>  		*cmd++ = MI_STORE_REGISTER_MEM;
> -		*cmd++ = RCS_GPR0;
> +		*cmd++ = reg;
>  		*cmd++ = result;
>  	}
>  	*cmd = MI_BATCH_BUFFER_END;
> @@ -1686,6 +1689,28 @@ static int read_from_scratch(struct i915_gem_context *ctx,
>  	return err;
>  }
>  
> +static int check_scratch_page(struct i915_gem_context *ctx, u32 *out)
> +{
> +	struct page *page = ctx_vm(ctx)->scratch[0].base.page;
> +	u32 *vaddr;
> +	int err = 0;
> +
> +	if (!page) {
> +		pr_err("No scratch page!\n");
> +		return -EINVAL;
> +	}
> +
> +	vaddr = kmap(page);
> +	memcpy(out, vaddr, sizeof(*out));
> +	if (memchr_inv(vaddr, *out, PAGE_SIZE)) {
> +		pr_err("Inconsistent initial state of scratch page!\n");
> +		err = -EINVAL;
> +	}
> +	kunmap(page);
> +
> +	return err;
> +}
> +
>  static int igt_vm_isolation(void *arg)
>  {
>  	struct drm_i915_private *i915 = arg;
> @@ -1696,6 +1721,7 @@ static int igt_vm_isolation(void *arg)
>  	I915_RND_STATE(prng);
>  	struct file *file;
>  	u64 vm_total;
> +	u32 expected;
>  	int err;
>  
>  	if (INTEL_GEN(i915) < 7)
> @@ -1720,12 +1746,21 @@ static int igt_vm_isolation(void *arg)
>  		goto out_file;
>  	}
>  
> +	/* Read the initial state of the scratch page */
> +	err = check_scratch_page(ctx_a, &expected);
> +	if (err)
> +		goto out_file;
> +
>  	ctx_b = live_context(i915, file);
>  	if (IS_ERR(ctx_b)) {
>  		err = PTR_ERR(ctx_b);
>  		goto out_file;
>  	}
>  
> +	err = check_scratch_page(ctx_b, &expected);
> +	if (err)
> +		goto out_file;
> +
>  	/* We can only test vm isolation, if the vm are distinct */
>  	if (ctx_vm(ctx_a) == ctx_vm(ctx_b))
>  		goto out_file;
> @@ -1743,6 +1778,10 @@ static int igt_vm_isolation(void *arg)
>  		if (!intel_engine_can_store_dword(engine))
>  			continue;
>  
> +		/* Not all engines have their own GPR! */
> +		if (INTEL_GEN(i915) < 8 && engine->class != RENDER_CLASS)
> +			continue;
> +
>  		while (!__igt_timeout(end_time, NULL)) {
>  			u32 value = 0xc5c5c5c5;
>  			u64 offset;
> @@ -1760,7 +1799,7 @@ static int igt_vm_isolation(void *arg)
>  			if (err)
>  				goto out_file;
>  
> -			if (value) {
> +			if (value != expected) {
>  				pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n",
>  				       engine->name, value,
>  				       upper_32_bits(offset),
> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
> index 45d8e0019a8e..bb9a6e638175 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
> @@ -299,6 +299,25 @@ fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count)
>  	kunmap_atomic(memset64(kmap_atomic(p->page), val, count));
>  }
>  
> +static void poison_scratch_page(struct page *page, unsigned long size)
> +{
> +	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
> +		return;
> +
> +	GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
> +
> +	do {
> +		void *vaddr;
> +
> +		vaddr = kmap(page);
> +		memset(vaddr, POISON_FREE, PAGE_SIZE);
> +		kunmap(page);
> +
> +		page = pfn_to_page(page_to_pfn(page) + 1);
> +		size -= PAGE_SIZE;
> +	} while (size);
> +}
> +
>  int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
>  {
>  	unsigned long size;
> @@ -331,6 +350,17 @@ int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
>  		if (unlikely(!page))
>  			goto skip;
>  
> +		/*
> +		 * Use a non-zero scratch page for debugging.
> +		 *
> +		 * We want a value that should be reasonably obvious
> +		 * to spot in the error state, while also causing a GPU hang
> +		 * if executed. We prefer using a clear page in production, so
> +		 * should it ever be accidentally used, the effect should be
> +		 * fairly benign.
> +		 */
> +		poison_scratch_page(page, size);
> +
>  		addr = dma_map_page_attrs(vm->dma,
>  					  page, 0, size,
>  					  PCI_DMA_BIDIRECTIONAL,
> -- 
> 2.25.0
Chris Wilson Jan. 23, 2020, 12:12 p.m. UTC | #2
Quoting Mika Kuoppala (2020-01-23 11:56:20)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Using a clear page for scratch means that we have relatively benign
> > errors in case it is accidentally used, but that can be rather too
> > benign for debugging. If we poison the scratch, ideally it quickly
> > results in an obvious error.
> >
> > v2: Set each page individually just in case we are using highmem for our
> > scratch page.
> > v3: Pick a new scratch register as MI_STORE_REGISTER_MEM does not work
> > with GPR0 on gen7, unbelievably.
> >
> > Suggested-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> 
> I have a faint memory...aeons ago..might have.
> 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > Cc: Matthew Auld <matthew.william.auld@gmail.com>
> > ---
> >  .../drm/i915/gem/selftests/i915_gem_context.c | 51 ++++++++++++++++---
> >  drivers/gpu/drm/i915/gt/intel_gtt.c           | 30 +++++++++++
> >  2 files changed, 75 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> > index 7fc46861a54d..00a56a8b309a 100644
> > --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
> > @@ -1575,7 +1575,6 @@ static int read_from_scratch(struct i915_gem_context *ctx,
> >       struct drm_i915_private *i915 = ctx->i915;
> >       struct drm_i915_gem_object *obj;
> >       struct i915_address_space *vm;
> > -     const u32 RCS_GPR0 = 0x2600; /* not all engines have their own GPR! */
> >       const u32 result = 0x100;
> >       struct i915_request *rq;
> >       struct i915_vma *vma;
> > @@ -1596,20 +1595,24 @@ static int read_from_scratch(struct i915_gem_context *ctx,
> >  
> >       memset(cmd, POISON_INUSE, PAGE_SIZE);
> >       if (INTEL_GEN(i915) >= 8) {
> > +             const u32 GPR0 = engine->mmio_base + 0x600;
> > +
> >               *cmd++ = MI_LOAD_REGISTER_MEM_GEN8;
> > -             *cmd++ = RCS_GPR0;
> > +             *cmd++ = GPR0;
> >               *cmd++ = lower_32_bits(offset);
> >               *cmd++ = upper_32_bits(offset);
> >               *cmd++ = MI_STORE_REGISTER_MEM_GEN8;
> > -             *cmd++ = RCS_GPR0;
> > +             *cmd++ = GPR0;
> >               *cmd++ = result;
> >               *cmd++ = 0;
> >       } else {
> > +             const u32 reg = engine->mmio_base + 0x420;
> 
> 3d prim end offset? Well should not matter for this selftest
> but did you check 0xA198?

Forcewake is handled by MI from the engines, if I understood you
correctly. First thought it was indeed just that that !rcs engines
couldn't read from the rcs, so limited it to just rcs and still it
failed.
> 
> How have 0x600 been been working in gen7 previously?

No idea. Tried to use I915_DISPATCH_SECURE (fixing up the batch vma to
be in the GGTT) and it still returned 0. Even after poisoning the GPR
with MI_LOAD_REG_IMM. Ergo it had to be the read of GPR that was being
scrubbed (since we poison the destination to verify the write lands).

But 3DPRIM_END_OFFSET worked (I suspected it might since it's part of
the indirect glDrawArrays), so I assume it's just some more of the
wonderfully askew validation on gen7.
-Chris

Patch
diff mbox series

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 7fc46861a54d..00a56a8b309a 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -1575,7 +1575,6 @@  static int read_from_scratch(struct i915_gem_context *ctx,
 	struct drm_i915_private *i915 = ctx->i915;
 	struct drm_i915_gem_object *obj;
 	struct i915_address_space *vm;
-	const u32 RCS_GPR0 = 0x2600; /* not all engines have their own GPR! */
 	const u32 result = 0x100;
 	struct i915_request *rq;
 	struct i915_vma *vma;
@@ -1596,20 +1595,24 @@  static int read_from_scratch(struct i915_gem_context *ctx,
 
 	memset(cmd, POISON_INUSE, PAGE_SIZE);
 	if (INTEL_GEN(i915) >= 8) {
+		const u32 GPR0 = engine->mmio_base + 0x600;
+
 		*cmd++ = MI_LOAD_REGISTER_MEM_GEN8;
-		*cmd++ = RCS_GPR0;
+		*cmd++ = GPR0;
 		*cmd++ = lower_32_bits(offset);
 		*cmd++ = upper_32_bits(offset);
 		*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
-		*cmd++ = RCS_GPR0;
+		*cmd++ = GPR0;
 		*cmd++ = result;
 		*cmd++ = 0;
 	} else {
+		const u32 reg = engine->mmio_base + 0x420;
+
 		*cmd++ = MI_LOAD_REGISTER_MEM;
-		*cmd++ = RCS_GPR0;
+		*cmd++ = reg;
 		*cmd++ = offset;
 		*cmd++ = MI_STORE_REGISTER_MEM;
-		*cmd++ = RCS_GPR0;
+		*cmd++ = reg;
 		*cmd++ = result;
 	}
 	*cmd = MI_BATCH_BUFFER_END;
@@ -1686,6 +1689,28 @@  static int read_from_scratch(struct i915_gem_context *ctx,
 	return err;
 }
 
+static int check_scratch_page(struct i915_gem_context *ctx, u32 *out)
+{
+	struct page *page = ctx_vm(ctx)->scratch[0].base.page;
+	u32 *vaddr;
+	int err = 0;
+
+	if (!page) {
+		pr_err("No scratch page!\n");
+		return -EINVAL;
+	}
+
+	vaddr = kmap(page);
+	memcpy(out, vaddr, sizeof(*out));
+	if (memchr_inv(vaddr, *out, PAGE_SIZE)) {
+		pr_err("Inconsistent initial state of scratch page!\n");
+		err = -EINVAL;
+	}
+	kunmap(page);
+
+	return err;
+}
+
 static int igt_vm_isolation(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
@@ -1696,6 +1721,7 @@  static int igt_vm_isolation(void *arg)
 	I915_RND_STATE(prng);
 	struct file *file;
 	u64 vm_total;
+	u32 expected;
 	int err;
 
 	if (INTEL_GEN(i915) < 7)
@@ -1720,12 +1746,21 @@  static int igt_vm_isolation(void *arg)
 		goto out_file;
 	}
 
+	/* Read the initial state of the scratch page */
+	err = check_scratch_page(ctx_a, &expected);
+	if (err)
+		goto out_file;
+
 	ctx_b = live_context(i915, file);
 	if (IS_ERR(ctx_b)) {
 		err = PTR_ERR(ctx_b);
 		goto out_file;
 	}
 
+	err = check_scratch_page(ctx_b, &expected);
+	if (err)
+		goto out_file;
+
 	/* We can only test vm isolation, if the vm are distinct */
 	if (ctx_vm(ctx_a) == ctx_vm(ctx_b))
 		goto out_file;
@@ -1743,6 +1778,10 @@  static int igt_vm_isolation(void *arg)
 		if (!intel_engine_can_store_dword(engine))
 			continue;
 
+		/* Not all engines have their own GPR! */
+		if (INTEL_GEN(i915) < 8 && engine->class != RENDER_CLASS)
+			continue;
+
 		while (!__igt_timeout(end_time, NULL)) {
 			u32 value = 0xc5c5c5c5;
 			u64 offset;
@@ -1760,7 +1799,7 @@  static int igt_vm_isolation(void *arg)
 			if (err)
 				goto out_file;
 
-			if (value) {
+			if (value != expected) {
 				pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n",
 				       engine->name, value,
 				       upper_32_bits(offset),
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 45d8e0019a8e..bb9a6e638175 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -299,6 +299,25 @@  fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count)
 	kunmap_atomic(memset64(kmap_atomic(p->page), val, count));
 }
 
+static void poison_scratch_page(struct page *page, unsigned long size)
+{
+	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+		return;
+
+	GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
+
+	do {
+		void *vaddr;
+
+		vaddr = kmap(page);
+		memset(vaddr, POISON_FREE, PAGE_SIZE);
+		kunmap(page);
+
+		page = pfn_to_page(page_to_pfn(page) + 1);
+		size -= PAGE_SIZE;
+	} while (size);
+}
+
 int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
 {
 	unsigned long size;
@@ -331,6 +350,17 @@  int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
 		if (unlikely(!page))
 			goto skip;
 
+		/*
+		 * Use a non-zero scratch page for debugging.
+		 *
+		 * We want a value that should be reasonably obvious
+		 * to spot in the error state, while also causing a GPU hang
+		 * if executed. We prefer using a clear page in production, so
+		 * should it ever be accidentally used, the effect should be
+		 * fairly benign.
+		 */
+		poison_scratch_page(page, size);
+
 		addr = dma_map_page_attrs(vm->dma,
 					  page, 0, size,
 					  PCI_DMA_BIDIRECTIONAL,