diff mbox series

[10/22] drm/i915/selftests: add write-dword test for LMEM

Message ID 20190927173409.31175-11-matthew.auld@intel.com (mailing list archive)
State New, archived
Headers show
Series LMEM basics | expand

Commit Message

Matthew Auld Sept. 27, 2019, 5:33 p.m. UTC
Simple test writing to dwords across an object, using various engines in
a randomized order, checking that our writes land from the cpu.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
---
 .../drm/i915/selftests/intel_memory_region.c  | 179 ++++++++++++++++++
 1 file changed, 179 insertions(+)

Comments

Chris Wilson Sept. 27, 2019, 7:27 p.m. UTC | #1
Quoting Matthew Auld (2019-09-27 18:33:57)
> +static int igt_gpu_write_dw(struct intel_context *ce,
> +                           struct i915_vma *vma,
> +                           u32 dword,
> +                           u32 value)
> +{
> +       int err;
> +
> +       i915_gem_object_lock(vma->obj);
> +       err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
> +       i915_gem_object_unlock(vma->obj);
> +       if (err)
> +               return err;

Your cpu check doesn't leave the caches dirty so this is overkill, and
worse may hide a coherency problem?

> +       return igt_gpu_fill_dw(ce, vma, dword * sizeof(u32),
> +                              vma->size >> PAGE_SHIFT, value);
> +}
Chris Wilson Sept. 27, 2019, 8:42 p.m. UTC | #2
Quoting Matthew Auld (2019-09-27 18:33:57)
> +       i = 0;
> +       engines = i915_gem_context_lock_engines(ctx);
> +       do {
> +               u32 rng = prandom_u32_state(&prng);
> +               u32 dword = offset_in_page(rng) / 4;
> +
> +               ce = engines->engines[order[i] % engines->num_engines];
> +               i = (i + 1) % (count * count);
> +               if (!ce || !intel_engine_can_store_dword(ce->engine))
> +                       continue;
> +
> +               err = igt_gpu_write_dw(ce, vma, dword, rng);
> +               if (err)
> +                       break;

Do you have a test that does
	dword,
	64B or cacheline,
	page
	random width&strides of the above
before doing the read back of a random dword from those?

Think nasty cache artifacts, PCI transfers, and timing.
-Chris
Matthew Auld Sept. 30, 2019, 9:58 a.m. UTC | #3
On 27/09/2019 21:42, Chris Wilson wrote:
> Quoting Matthew Auld (2019-09-27 18:33:57)
>> +       i = 0;
>> +       engines = i915_gem_context_lock_engines(ctx);
>> +       do {
>> +               u32 rng = prandom_u32_state(&prng);
>> +               u32 dword = offset_in_page(rng) / 4;
>> +
>> +               ce = engines->engines[order[i] % engines->num_engines];
>> +               i = (i + 1) % (count * count);
>> +               if (!ce || !intel_engine_can_store_dword(ce->engine))
>> +                       continue;
>> +
>> +               err = igt_gpu_write_dw(ce, vma, dword, rng);
>> +               if (err)
>> +                       break;
> 
> Do you have a test that does
> 	dword,
> 	64B or cacheline,
> 	page
> 	random width&strides of the above
> before doing the read back of a random dword from those?

Are you thinking write_dw + increment(dword, qword, cl, ..), or actually 
doing the fill: write_dw, write_qw, write_block?

Or maybe both? I have been playing around with the write_dw + increment 
for hugepages.c.

> 
> Think nasty cache artifacts, PCI transfers, and timing.
> -Chris
>
Chris Wilson Sept. 30, 2019, 10:46 a.m. UTC | #4
Quoting Matthew Auld (2019-09-30 10:58:15)
> On 27/09/2019 21:42, Chris Wilson wrote:
> > Quoting Matthew Auld (2019-09-27 18:33:57)
> >> +       i = 0;
> >> +       engines = i915_gem_context_lock_engines(ctx);
> >> +       do {
> >> +               u32 rng = prandom_u32_state(&prng);
> >> +               u32 dword = offset_in_page(rng) / 4;
> >> +
> >> +               ce = engines->engines[order[i] % engines->num_engines];
> >> +               i = (i + 1) % (count * count);
> >> +               if (!ce || !intel_engine_can_store_dword(ce->engine))
> >> +                       continue;
> >> +
> >> +               err = igt_gpu_write_dw(ce, vma, dword, rng);
> >> +               if (err)
> >> +                       break;
> > 
> > Do you have a test that does
> >       dword,
> >       64B or cacheline,
> >       page
> >       random width&strides of the above
> > before doing the read back of a random dword from those?
> 
> Are you thinking write_dw + increment(dword, qword, cl, ..), or actually 
> doing the fill: write_dw, write_qw, write_block?

Here, I think stride is most interesting to hit various caching/transfer
artifacts between the CPU and lmem (and possibly with writes to lmem).

I think write_dw et al better stress the GPU write side and the
instruction stream.
 
> Or maybe both? I have been playing around with the write_dw + increment 
> for hugepages.c.

Maybe both :) Never say no to more patterns! (Just be cautious of time
budget and use the cycles wisely to maximise coverage of your mental
model of the HW.) Once we get past the obvious coherency glitches in the
driver, it gets far more subtle. It's easy enough to filter out the noise
but deducing a pattern from gaps in the testing is much harder :)
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
index ba98e8254b80..8d7d8b9e00da 100644
--- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
@@ -7,13 +7,16 @@ 
 
 #include "../i915_selftest.h"
 
+
 #include "mock_drm.h"
 #include "mock_gem_device.h"
 #include "mock_region.h"
 
+#include "gem/i915_gem_context.h"
 #include "gem/i915_gem_lmem.h"
 #include "gem/i915_gem_region.h"
 #include "gem/i915_gem_object_blt.h"
+#include "gem/selftests/igt_gem_utils.h"
 #include "gem/selftests/mock_context.h"
 #include "gt/intel_gt.h"
 #include "selftests/igt_flush_test.h"
@@ -255,6 +258,133 @@  static int igt_mock_continuous(void *arg)
 	return err;
 }
 
+static int igt_gpu_write_dw(struct intel_context *ce,
+			    struct i915_vma *vma,
+			    u32 dword,
+			    u32 value)
+{
+	int err;
+
+	i915_gem_object_lock(vma->obj);
+	err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
+	i915_gem_object_unlock(vma->obj);
+	if (err)
+		return err;
+
+	return igt_gpu_fill_dw(ce, vma, dword * sizeof(u32),
+			       vma->size >> PAGE_SHIFT, value);
+}
+
+static int igt_cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
+{
+	unsigned long n;
+	int err;
+
+	i915_gem_object_lock(obj);
+	err = i915_gem_object_set_to_wc_domain(obj, false);
+	i915_gem_object_unlock(obj);
+	if (err)
+		return err;
+
+	err = i915_gem_object_pin_pages(obj);
+	if (err)
+		return err;
+
+	for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) {
+		u32 __iomem *base;
+		u32 read_val;
+
+		base = i915_gem_object_lmem_io_map_page_atomic(obj, n);
+
+		read_val = ioread32(base + dword);
+		io_mapping_unmap_atomic(base);
+		if (read_val != val) {
+			pr_err("n=%lu base[%u]=%u, val=%u\n",
+			       n, dword, read_val, val);
+			err = -EINVAL;
+			break;
+		}
+	}
+
+	i915_gem_object_unpin_pages(obj);
+	return err;
+}
+
+static int igt_gpu_write(struct i915_gem_context *ctx,
+			 struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = ctx->i915;
+	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
+	struct i915_gem_engines *engines;
+	struct i915_gem_engines_iter it;
+	struct intel_context *ce;
+	I915_RND_STATE(prng);
+	IGT_TIMEOUT(end_time);
+	unsigned int count;
+	struct i915_vma *vma;
+	int *order;
+	int i, n;
+	int err = 0;
+
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+
+	n = 0;
+	count = 0;
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+		count++;
+		if (!intel_engine_can_store_dword(ce->engine))
+			continue;
+
+		n++;
+	}
+	i915_gem_context_unlock_engines(ctx);
+	if (!n)
+		return 0;
+
+	order = i915_random_order(count * count, &prng);
+	if (!order)
+		return -ENOMEM;
+
+	vma = i915_vma_instance(obj, vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto out_free;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (err)
+		goto out_free;
+
+	i = 0;
+	engines = i915_gem_context_lock_engines(ctx);
+	do {
+		u32 rng = prandom_u32_state(&prng);
+		u32 dword = offset_in_page(rng) / 4;
+
+		ce = engines->engines[order[i] % engines->num_engines];
+		i = (i + 1) % (count * count);
+		if (!ce || !intel_engine_can_store_dword(ce->engine))
+			continue;
+
+		err = igt_gpu_write_dw(ce, vma, dword, rng);
+		if (err)
+			break;
+
+		err = igt_cpu_check(obj, dword, rng);
+		if (err)
+			break;
+	} while (!__igt_timeout(end_time, NULL));
+	i915_gem_context_unlock_engines(ctx);
+
+out_free:
+	kfree(order);
+
+	if (err == -ENOMEM)
+		err = 0;
+
+	return err;
+}
+
 static int igt_lmem_create(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
@@ -276,6 +406,54 @@  static int igt_lmem_create(void *arg)
 	return err;
 }
 
+static int igt_lmem_write_gpu(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	struct i915_gem_context *ctx;
+	struct drm_file *file;
+	I915_RND_STATE(prng);
+	u32 sz;
+	int err;
+
+	mutex_unlock(&i915->drm.struct_mutex);
+	file = mock_file(i915);
+	mutex_lock(&i915->drm.struct_mutex);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	ctx = live_context(i915, file);
+	if (IS_ERR(ctx)) {
+		err = PTR_ERR(ctx);
+		goto out_file;
+	}
+
+	sz = round_up(prandom_u32_state(&prng) % SZ_32M, PAGE_SIZE);
+
+	obj = i915_gem_object_create_lmem(i915, sz, 0);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		goto out_file;
+	}
+
+	err = i915_gem_object_pin_pages(obj);
+	if (err)
+		goto out_put;
+
+	err = igt_gpu_write(ctx, obj);
+	if (err)
+		pr_err("igt_gpu_write failed(%d)\n", err);
+
+	i915_gem_object_unpin_pages(obj);
+out_put:
+	i915_gem_object_put(obj);
+out_file:
+	mutex_unlock(&i915->drm.struct_mutex);
+	mock_file_free(i915, file);
+	mutex_lock(&i915->drm.struct_mutex);
+	return err;
+}
+
 static int igt_lmem_write_cpu(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
@@ -389,6 +567,7 @@  int intel_memory_region_live_selftests(struct drm_i915_private *i915)
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_lmem_create),
 		SUBTEST(igt_lmem_write_cpu),
+		SUBTEST(igt_lmem_write_gpu),
 	};
 	int err;