diff mbox series

[i-g-t,2/3] i915/gem_cpu_reloc: Use a self-modifying chained batch

Message ID 20190116093509.30195-2-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [i-g-t,1/3] i915/gem_userptr_blits: Only mlock the memfd once, not the arena | expand

Commit Message

Chris Wilson Jan. 16, 2019, 9:35 a.m. UTC
Use another sensitive CPU reloc to emit a chained batch from inside the
updated buffer to reduce the workload on slow machines to fit within the
CI timeout.

References: https://bugs.freedesktop.org/show_bug.cgi?id=108248
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/i915/gem_cpu_reloc.c | 347 ++++++++++++++++++++-----------------
 1 file changed, 189 insertions(+), 158 deletions(-)

Comments

Mika Kuoppala Jan. 16, 2019, 2:22 p.m. UTC | #1
Chris Wilson <chris@chris-wilson.co.uk> writes:

> Use another sensitive CPU reloc to emit a chained batch from inside the
> updated buffer to reduce the workload on slow machines to fit within the
> CI timeout.
>
> References: https://bugs.freedesktop.org/show_bug.cgi?id=108248
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  tests/i915/gem_cpu_reloc.c | 347 ++++++++++++++++++++-----------------
>  1 file changed, 189 insertions(+), 158 deletions(-)
>
> diff --git a/tests/i915/gem_cpu_reloc.c b/tests/i915/gem_cpu_reloc.c
> index 882c312d4..33c4e4e0f 100644
> --- a/tests/i915/gem_cpu_reloc.c
> +++ b/tests/i915/gem_cpu_reloc.c
> @@ -59,214 +59,245 @@
>  
>  #include "intel_bufmgr.h"
>  
> -IGT_TEST_DESCRIPTION("Test the relocations through the CPU domain.");
> +#define MI_INSTR(opcode, flags) ((opcode) << 23 | (flags))
>  
> -static uint32_t use_blt;
> +IGT_TEST_DESCRIPTION("Test the relocations through the CPU domain.");
>  
> -static void copy(int fd, uint32_t batch, uint32_t src, uint32_t dst)
> +static uint32_t *
> +gen2_emit_store_addr(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
>  {
> -	struct drm_i915_gem_execbuffer2 execbuf;
> -	struct drm_i915_gem_relocation_entry gem_reloc[2];
> -	struct drm_i915_gem_exec_object2 gem_exec[3];
> -
> -	gem_reloc[0].offset = 4 * sizeof(uint32_t);
> -	gem_reloc[0].delta = 0;
> -	gem_reloc[0].target_handle = dst;
> -	gem_reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
> -	gem_reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
> -	gem_reloc[0].presumed_offset = -1;
> -
> -	gem_reloc[1].offset = 7 * sizeof(uint32_t);
> -	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
> -		gem_reloc[1].offset += sizeof(uint32_t);
> -	gem_reloc[1].delta = 0;
> -	gem_reloc[1].target_handle = src;
> -	gem_reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
> -	gem_reloc[1].write_domain = 0;
> -	gem_reloc[1].presumed_offset = -1;
> -
> -	memset(gem_exec, 0, sizeof(gem_exec));
> -	gem_exec[0].handle = src;
> -	gem_exec[1].handle = dst;
> -	gem_exec[2].handle = batch;
> -	gem_exec[2].relocation_count = 2;
> -	gem_exec[2].relocs_ptr = to_user_pointer(gem_reloc);
> -
> -	memset(&execbuf, 0, sizeof(execbuf));
> -	execbuf.buffers_ptr = to_user_pointer(gem_exec);
> -	execbuf.buffer_count = 3;
> -	execbuf.batch_len = 4096;
> -	execbuf.flags = use_blt;
> -
> -	gem_execbuf(fd, &execbuf);
> +	*cs++ = MI_STORE_DWORD_IMM - 1;
> +	addr->offset += sizeof(*cs);
> +	cs += 1; /* addr */
> +	cs += 1; /* value: implicit 0xffffffff */
> +	return cs;
> +}
> +static uint32_t *
> +gen4_emit_store_addr(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
> +{
> +	*cs++ = MI_STORE_DWORD_IMM;
> +	*cs++ = 0;
> +	addr->offset += 2 * sizeof(*cs);
> +	cs += 1; /* addr */
> +	cs += 1; /* value: implicit 0xffffffff */
> +	return cs;
> +}
> +static uint32_t *
> +gen8_emit_store_addr(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
> +{
> +	*cs++ = (MI_STORE_DWORD_IMM | 1 << 21) + 1;
> +	addr->offset += sizeof(*cs);
> +	igt_assert((addr->delta & 7) == 0);
> +	cs += 2; /* addr */
> +	cs += 2; /* value: implicit 0xffffffffffffffff */
> +	return cs;
>  }
>  
> -static void exec(int fd, uint32_t handle)
> +static uint32_t *
> +gen2_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
>  {
> -	struct drm_i915_gem_execbuffer2 execbuf;
> -	struct drm_i915_gem_exec_object2 gem_exec;
> +	*cs++ = MI_BATCH_BUFFER_START | 2 << 6;
> +	reloc->offset += sizeof(*cs);
> +	reloc->delta += 1;
> +	cs += 1; /* addr */
> +	return cs;
> +}
> +static uint32_t *
> +gen4_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
> +{
> +	*cs++ = MI_BATCH_BUFFER_START | 2 << 6 | 1 << 8;
> +	reloc->offset += sizeof(*cs);
> +	cs += 1; /* addr */
> +	return cs;
> +}
> +static uint32_t *
> +gen6_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
> +{
> +	*cs++ = MI_BATCH_BUFFER_START | 1 << 8;
> +	reloc->offset += sizeof(*cs);
> +	cs += 1; /* addr */
> +	return cs;
> +}
> +static uint32_t *
> +hsw_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
> +{
> +	*cs++ = MI_BATCH_BUFFER_START | 2 << 6 | 1 << 8 | 1 << 13;
> +	reloc->offset += sizeof(*cs);
> +	cs += 1; /* addr */
> +	return cs;
> +}
> +static uint32_t *
> +gen8_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
> +{
> +	if (((uintptr_t)cs & 7) == 0) {
> +		*cs++ = MI_NOOP; /* align addr for MI_STORE_DWORD_IMM */
> +		reloc->offset += sizeof(*cs);
> +	}

Align it so that after the bb start emitted we are in right alignment?
Otherwise it looks it should have '!' in it.
-Mika

>  
> -	memset(&gem_exec, 0, sizeof(gem_exec));
> -	gem_exec.handle = handle;
> +	*cs++ = MI_BATCH_BUFFER_START + 1;
> +	reloc->offset += sizeof(*cs);
> +	cs += 2; /* addr */
>  
> -	memset(&execbuf, 0, sizeof(execbuf));
> -	execbuf.buffers_ptr = to_user_pointer(&gem_exec);
> -	execbuf.buffer_count = 1;
> -	execbuf.batch_len = 4096;
> +	return cs;
> +}
>  
> -	gem_execbuf(fd, &execbuf);
> +static void *
> +create_tmpl(int i915, struct drm_i915_gem_relocation_entry *reloc)
> +{
> +	const uint32_t devid = intel_get_drm_devid(i915);
> +	const int gen = intel_gen(devid);
> +	uint32_t *(*emit_store_addr)(uint32_t *cs,
> +				   struct drm_i915_gem_relocation_entry *addr);
> +	uint32_t *(*emit_bb_start)(uint32_t *cs,
> +				   struct drm_i915_gem_relocation_entry *reloc);
> +	void *tmpl;
> +
> +	/* could use BLT_FILL instead for gen2 */
> +	igt_require(gem_can_store_dword(i915, 0));
> +
> +	if (gen >= 8)
> +		emit_store_addr = gen8_emit_store_addr;
> +	else if (gen >= 4)
> +		emit_store_addr = gen4_emit_store_addr;
> +	else
> +		emit_store_addr = gen2_emit_store_addr;
> +
> +	if (gen >= 8)
> +		emit_bb_start = gen8_emit_bb_start;
> +	else if (IS_HASWELL(devid))
> +		emit_bb_start = hsw_emit_bb_start;
> +	else if (gen >= 6)
> +		emit_bb_start = gen6_emit_bb_start;
> +	else if (gen >= 4)
> +		emit_bb_start = gen4_emit_bb_start;
> +	else
> +		emit_bb_start = gen2_emit_bb_start;
> +
> +	tmpl = malloc(4096);
> +	igt_assert(tmpl);
> +	memset(tmpl, 0xff, 4096);
> +
> +	/* Jump over the booby traps to the end */
> +	reloc[0].delta = 64;
> +	emit_bb_start(tmpl, &reloc[0]);
> +
> +	/* Restore the bad address to catch missing relocs */
> +	reloc[1].offset = 64;
> +	reloc[1].delta = reloc[0].offset;
> +	*emit_store_addr(tmpl + 64, &reloc[1]) = MI_BATCH_BUFFER_END;
> +
> +	return tmpl;
>  }
>  
> -uint32_t gen6_batch[] = {
> -	(XY_SRC_COPY_BLT_CMD | 6 |
> -	 XY_SRC_COPY_BLT_WRITE_ALPHA |
> -	 XY_SRC_COPY_BLT_WRITE_RGB),
> -	(3 << 24 | /* 32 bits */
> -	 0xcc << 16 | /* copy ROP */
> -	 4096),
> -	0 << 16 | 0, /* dst x1, y1 */
> -	1 << 16 | 2,
> -	0, /* dst relocation */
> -	0 << 16 | 0, /* src x1, y1 */
> -	4096,
> -	0, /* src relocation */
> -	MI_BATCH_BUFFER_END,
> -};
> -
> -uint32_t gen8_batch[] = {
> -	(XY_SRC_COPY_BLT_CMD | 8 |
> -	 XY_SRC_COPY_BLT_WRITE_ALPHA |
> -	 XY_SRC_COPY_BLT_WRITE_RGB),
> -	(3 << 24 | /* 32 bits */
> -	 0xcc << 16 | /* copy ROP */
> -	 4096),
> -	0 << 16 | 0, /* dst x1, y1 */
> -	1 << 16 | 2,
> -	0, /* dst relocation */
> -	0, /* FIXME */
> -	0 << 16 | 0, /* src x1, y1 */
> -	4096,
> -	0, /* src relocation */
> -	0, /* FIXME */
> -	MI_BATCH_BUFFER_END,
> -};
> -
> -uint32_t *batch = gen6_batch;
> -uint32_t batch_size = sizeof(gen6_batch);
> -
> -static void run_test(int fd, int count)
> +static void run_test(int i915, int count)
>  {
> -	const uint32_t hang[] = {-1, -1, -1, -1};
> -	const uint32_t end[] = {MI_BATCH_BUFFER_END, 0};
> -	uint32_t noop;
> -	uint32_t *handles;
> -	int i;
> +	struct drm_i915_gem_execbuffer2 execbuf;
> +	struct drm_i915_gem_relocation_entry reloc[2];
> +	struct drm_i915_gem_exec_object2 obj;
>  
> -	noop = intel_get_drm_devid(fd);
> +	uint32_t *handles;
> +	uint32_t *tmpl;
>  
> -	use_blt = 0;
> -	if (intel_gen(noop) >= 6)
> -		use_blt = I915_EXEC_BLT;
> +	handles = malloc(count * sizeof(uint32_t));
> +	igt_assert(handles);
>  
> -	if (intel_gen(noop) >= 8) {
> -		batch = gen8_batch;
> -		batch_size += 2 * 4;
> +	memset(reloc, 0, sizeof(reloc));
> +	tmpl = create_tmpl(i915, reloc);
> +	for (int i = 0; i < count; i++) {
> +		handles[i] = gem_create(i915, 4096);
> +		gem_write(i915, handles[i], 0, tmpl, 4096);
>  	}
> +	free(tmpl);
>  
> -	handles = malloc (count * sizeof(uint32_t));
> -	igt_assert(handles);
> +	memset(&obj, 0, sizeof(obj));
> +	obj.relocs_ptr = to_user_pointer(reloc);
> +	obj.relocation_count = ARRAY_SIZE(reloc);
>  
> -	noop = gem_create(fd, 4096);
> -	gem_write(fd, noop, 0, end, sizeof(end));
> +	memset(&execbuf, 0, sizeof(execbuf));
> +	execbuf.buffers_ptr = to_user_pointer(&obj);
> +	execbuf.buffer_count = 1;
>  
>  	/* fill the entire gart with batches and run them */
> -	for (i = 0; i < count; i++) {
> -		uint32_t bad;
> -
> -		handles[i] = gem_create(fd, 4096);
> -		gem_write(fd, handles[i], 0, batch, batch_size);
> -
> -		bad = gem_create(fd, 4096);
> -		gem_write(fd, bad, 0, hang, sizeof(hang));
> -		gem_write(fd, bad, 4096-sizeof(end), end, sizeof(end));
> +	for (int i = 0; i < count; i++) {
> +		obj.handle = handles[i];
>  
> -		/* launch the newly created batch */
> -		copy(fd, handles[i], noop, bad);
> -		exec(fd, bad);
> -		gem_close(fd, bad);
> +		reloc[0].target_handle = obj.handle;
> +		reloc[0].presumed_offset = -1;
> +		reloc[1].target_handle = obj.handle;
> +		reloc[1].presumed_offset = -1;
>  
> -		igt_progress("gem_cpu_reloc: ", i, 2*count);
> +		gem_execbuf(i915, &execbuf);
>  	}
>  
>  	/* And again in reverse to try and catch the relocation code out */
> -	for (i = 0; i < count; i++) {
> -		uint32_t bad;
> +	for (int i = 0; i < count; i++) {
> +		obj.handle = handles[count - i - 1];
>  
> -		bad = gem_create(fd, 4096);
> -		gem_write(fd, bad, 0, hang, sizeof(hang));
> -		gem_write(fd, bad, 4096-sizeof(end), end, sizeof(end));
> +		reloc[0].target_handle = obj.handle;
> +		reloc[0].presumed_offset = -1;
> +		reloc[1].target_handle = obj.handle;
> +		reloc[1].presumed_offset = -1;
>  
> -		/* launch the newly created batch */
> -		copy(fd, handles[count-i-1], noop, bad);
> -		exec(fd, bad);
> -		gem_close(fd, bad);
> -
> -		igt_progress("gem_cpu_reloc: ", count+i, 3*count);
> +		gem_execbuf(i915, &execbuf);
>  	}
>  
> -	/* Third time lucky? */
> -	for (i = 0; i < count; i++) {
> -		uint32_t bad;
> +	/* Third time unlucky? */
> +	for (int i = 0; i < count; i++) {
> +		obj.handle = handles[i];
>  
> -		bad = gem_create(fd, 4096);
> -		gem_write(fd, bad, 0, hang, sizeof(hang));
> -		gem_write(fd, bad, 4096-sizeof(end), end, sizeof(end));
> +		reloc[0].target_handle = obj.handle;
> +		reloc[0].presumed_offset = -1;
> +		reloc[1].target_handle = obj.handle;
> +		reloc[1].presumed_offset = -1;
>  
> -		/* launch the newly created batch */
> -		gem_set_domain(fd, handles[i],
> -			       I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
> -		copy(fd, handles[i], noop, bad);
> -		exec(fd, bad);
> -		gem_close(fd, bad);
> +		gem_set_domain(i915, obj.handle,
> +			       I915_GEM_DOMAIN_CPU,
> +			       I915_GEM_DOMAIN_CPU);
>  
> -		igt_progress("gem_cpu_reloc: ", 2*count+i, 3*count);
> +		gem_execbuf(i915, &execbuf);
>  	}
>  
> -	igt_info("Subtest suceeded, cleanup up - this might take a while.\n");
> -	for (i = 0; i < count; i++) {
> -		gem_close(fd, handles[i]);
> -	}
> -	gem_close(fd, noop);
> +	for (int i = 0; i < count; i++)
> +		gem_close(i915, handles[i]);
>  	free(handles);
>  }
>  
>  igt_main
>  {
> -	uint64_t aper_size;
> -	int fd, count;
> +	int i915;
>  
>  	igt_fixture {
> -		fd = drm_open_driver(DRIVER_INTEL);
> -		igt_require_gem(fd);
> -	}
> +		i915 = drm_open_driver(DRIVER_INTEL);
> +		igt_require_gem(i915);
>  
> -	igt_subtest("basic") {
> -		run_test (fd, 10);
> +		igt_fork_hang_detector(i915);
>  	}
>  
> +	igt_subtest("basic")
> +		run_test(i915, 1);
>  
>  	igt_subtest("full") {
> -		aper_size = gem_mappable_aperture_size();
> -		count = aper_size / 4096 * 2;
> +		uint64_t aper_size = gem_mappable_aperture_size();
> +		unsigned long count = aper_size / 4096 + 1;
> +
> +		intel_require_memory(count, 4096, CHECK_RAM);
> +
> +		run_test(i915, count);
> +	}
> +
> +	igt_subtest("forked") {
> +		uint64_t aper_size = gem_mappable_aperture_size();
> +		unsigned long count = aper_size / 4096 + 1;
> +		int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
>  
> -		/* count + 2 (noop & bad) buffers. A gem object appears to
> -                   require about 2kb + buffer + kernel overhead */
> -		intel_require_memory(2+count, 2048+4096, CHECK_RAM);
> +		intel_require_memory(count, 4096, CHECK_RAM);
>  
> -		run_test (fd, count);
> +		igt_fork(child, ncpus)
> +			run_test(i915, count / ncpus + 1);
> +		igt_waitchildren();
>  	}
>  
>  	igt_fixture {
> -		close(fd);
> +		igt_stop_hang_detector();
>  	}
>  }
> -- 
> 2.20.1
Chris Wilson Jan. 16, 2019, 2:30 p.m. UTC | #2
Quoting Mika Kuoppala (2019-01-16 14:22:59)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Use another sensitive CPU reloc to emit a chained batch from inside the
> > updated buffer to reduce the workload on slow machines to fit within the
> > CI timeout.
> >
> > References: https://bugs.freedesktop.org/show_bug.cgi?id=108248
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  tests/i915/gem_cpu_reloc.c | 347 ++++++++++++++++++++-----------------
> >  1 file changed, 189 insertions(+), 158 deletions(-)
> >
> > diff --git a/tests/i915/gem_cpu_reloc.c b/tests/i915/gem_cpu_reloc.c
> > index 882c312d4..33c4e4e0f 100644
> > --- a/tests/i915/gem_cpu_reloc.c
> > +++ b/tests/i915/gem_cpu_reloc.c
> > @@ -59,214 +59,245 @@
> >  
> >  #include "intel_bufmgr.h"
> >  
> > -IGT_TEST_DESCRIPTION("Test the relocations through the CPU domain.");
> > +#define MI_INSTR(opcode, flags) ((opcode) << 23 | (flags))
> >  
> > -static uint32_t use_blt;
> > +IGT_TEST_DESCRIPTION("Test the relocations through the CPU domain.");
> >  
> > -static void copy(int fd, uint32_t batch, uint32_t src, uint32_t dst)
> > +static uint32_t *
> > +gen2_emit_store_addr(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
> >  {
> > -     struct drm_i915_gem_execbuffer2 execbuf;
> > -     struct drm_i915_gem_relocation_entry gem_reloc[2];
> > -     struct drm_i915_gem_exec_object2 gem_exec[3];
> > -
> > -     gem_reloc[0].offset = 4 * sizeof(uint32_t);
> > -     gem_reloc[0].delta = 0;
> > -     gem_reloc[0].target_handle = dst;
> > -     gem_reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
> > -     gem_reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
> > -     gem_reloc[0].presumed_offset = -1;
> > -
> > -     gem_reloc[1].offset = 7 * sizeof(uint32_t);
> > -     if (intel_gen(intel_get_drm_devid(fd)) >= 8)
> > -             gem_reloc[1].offset += sizeof(uint32_t);
> > -     gem_reloc[1].delta = 0;
> > -     gem_reloc[1].target_handle = src;
> > -     gem_reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
> > -     gem_reloc[1].write_domain = 0;
> > -     gem_reloc[1].presumed_offset = -1;
> > -
> > -     memset(gem_exec, 0, sizeof(gem_exec));
> > -     gem_exec[0].handle = src;
> > -     gem_exec[1].handle = dst;
> > -     gem_exec[2].handle = batch;
> > -     gem_exec[2].relocation_count = 2;
> > -     gem_exec[2].relocs_ptr = to_user_pointer(gem_reloc);
> > -
> > -     memset(&execbuf, 0, sizeof(execbuf));
> > -     execbuf.buffers_ptr = to_user_pointer(gem_exec);
> > -     execbuf.buffer_count = 3;
> > -     execbuf.batch_len = 4096;
> > -     execbuf.flags = use_blt;
> > -
> > -     gem_execbuf(fd, &execbuf);
> > +     *cs++ = MI_STORE_DWORD_IMM - 1;
> > +     addr->offset += sizeof(*cs);
> > +     cs += 1; /* addr */
> > +     cs += 1; /* value: implicit 0xffffffff */
> > +     return cs;
> > +}
> > +static uint32_t *
> > +gen4_emit_store_addr(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
> > +{
> > +     *cs++ = MI_STORE_DWORD_IMM;
> > +     *cs++ = 0;
> > +     addr->offset += 2 * sizeof(*cs);
> > +     cs += 1; /* addr */
> > +     cs += 1; /* value: implicit 0xffffffff */
> > +     return cs;
> > +}
> > +static uint32_t *
> > +gen8_emit_store_addr(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
> > +{
> > +     *cs++ = (MI_STORE_DWORD_IMM | 1 << 21) + 1;
> > +     addr->offset += sizeof(*cs);
> > +     igt_assert((addr->delta & 7) == 0);
> > +     cs += 2; /* addr */
> > +     cs += 2; /* value: implicit 0xffffffffffffffff */
> > +     return cs;
> >  }
> >  
> > -static void exec(int fd, uint32_t handle)
> > +static uint32_t *
> > +gen2_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
> >  {
> > -     struct drm_i915_gem_execbuffer2 execbuf;
> > -     struct drm_i915_gem_exec_object2 gem_exec;
> > +     *cs++ = MI_BATCH_BUFFER_START | 2 << 6;
> > +     reloc->offset += sizeof(*cs);
> > +     reloc->delta += 1;
> > +     cs += 1; /* addr */
> > +     return cs;
> > +}
> > +static uint32_t *
> > +gen4_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
> > +{
> > +     *cs++ = MI_BATCH_BUFFER_START | 2 << 6 | 1 << 8;
> > +     reloc->offset += sizeof(*cs);
> > +     cs += 1; /* addr */
> > +     return cs;
> > +}
> > +static uint32_t *
> > +gen6_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
> > +{
> > +     *cs++ = MI_BATCH_BUFFER_START | 1 << 8;
> > +     reloc->offset += sizeof(*cs);
> > +     cs += 1; /* addr */
> > +     return cs;
> > +}
> > +static uint32_t *
> > +hsw_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
> > +{
> > +     *cs++ = MI_BATCH_BUFFER_START | 2 << 6 | 1 << 8 | 1 << 13;
> > +     reloc->offset += sizeof(*cs);
> > +     cs += 1; /* addr */
> > +     return cs;
> > +}
> > +static uint32_t *
> > +gen8_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
> > +{
> > +     if (((uintptr_t)cs & 7) == 0) {
> > +             *cs++ = MI_NOOP; /* align addr for MI_STORE_DWORD_IMM */
> > +             reloc->offset += sizeof(*cs);
> > +     }
> 
> Align it so that after the bb start emitted we are in right alignment?
> Otherwise it looks it should have '!' in it.

It's so the batch buffer address is aligned to the qword (since we use a
qword MI_STORE_DATA_IMM which requires destination alignment)

Imagine s/reloc/addr/ might be a better clue.
-chris
Mika Kuoppala Jan. 16, 2019, 2:49 p.m. UTC | #3
Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2019-01-16 14:22:59)
>> Chris Wilson <chris@chris-wilson.co.uk> writes:
>> 
>> > Use another sensitive CPU reloc to emit a chained batch from inside the
>> > updated buffer to reduce the workload on slow machines to fit within the
>> > CI timeout.
>> >
>> > References: https://bugs.freedesktop.org/show_bug.cgi?id=108248
>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> > ---
>> >  tests/i915/gem_cpu_reloc.c | 347 ++++++++++++++++++++-----------------
>> >  1 file changed, 189 insertions(+), 158 deletions(-)
>> >
>> > diff --git a/tests/i915/gem_cpu_reloc.c b/tests/i915/gem_cpu_reloc.c
>> > index 882c312d4..33c4e4e0f 100644
>> > --- a/tests/i915/gem_cpu_reloc.c
>> > +++ b/tests/i915/gem_cpu_reloc.c
>> > @@ -59,214 +59,245 @@
>> >  
>> >  #include "intel_bufmgr.h"
>> >  
>> > -IGT_TEST_DESCRIPTION("Test the relocations through the CPU domain.");
>> > +#define MI_INSTR(opcode, flags) ((opcode) << 23 | (flags))
>> >  
>> > -static uint32_t use_blt;
>> > +IGT_TEST_DESCRIPTION("Test the relocations through the CPU domain.");
>> >  
>> > -static void copy(int fd, uint32_t batch, uint32_t src, uint32_t dst)
>> > +static uint32_t *
>> > +gen2_emit_store_addr(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
>> >  {
>> > -     struct drm_i915_gem_execbuffer2 execbuf;
>> > -     struct drm_i915_gem_relocation_entry gem_reloc[2];
>> > -     struct drm_i915_gem_exec_object2 gem_exec[3];
>> > -
>> > -     gem_reloc[0].offset = 4 * sizeof(uint32_t);
>> > -     gem_reloc[0].delta = 0;
>> > -     gem_reloc[0].target_handle = dst;
>> > -     gem_reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
>> > -     gem_reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
>> > -     gem_reloc[0].presumed_offset = -1;
>> > -
>> > -     gem_reloc[1].offset = 7 * sizeof(uint32_t);
>> > -     if (intel_gen(intel_get_drm_devid(fd)) >= 8)
>> > -             gem_reloc[1].offset += sizeof(uint32_t);
>> > -     gem_reloc[1].delta = 0;
>> > -     gem_reloc[1].target_handle = src;
>> > -     gem_reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
>> > -     gem_reloc[1].write_domain = 0;
>> > -     gem_reloc[1].presumed_offset = -1;
>> > -
>> > -     memset(gem_exec, 0, sizeof(gem_exec));
>> > -     gem_exec[0].handle = src;
>> > -     gem_exec[1].handle = dst;
>> > -     gem_exec[2].handle = batch;
>> > -     gem_exec[2].relocation_count = 2;
>> > -     gem_exec[2].relocs_ptr = to_user_pointer(gem_reloc);
>> > -
>> > -     memset(&execbuf, 0, sizeof(execbuf));
>> > -     execbuf.buffers_ptr = to_user_pointer(gem_exec);
>> > -     execbuf.buffer_count = 3;
>> > -     execbuf.batch_len = 4096;
>> > -     execbuf.flags = use_blt;
>> > -
>> > -     gem_execbuf(fd, &execbuf);
>> > +     *cs++ = MI_STORE_DWORD_IMM - 1;
>> > +     addr->offset += sizeof(*cs);
>> > +     cs += 1; /* addr */
>> > +     cs += 1; /* value: implicit 0xffffffff */
>> > +     return cs;
>> > +}
>> > +static uint32_t *
>> > +gen4_emit_store_addr(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
>> > +{
>> > +     *cs++ = MI_STORE_DWORD_IMM;
>> > +     *cs++ = 0;
>> > +     addr->offset += 2 * sizeof(*cs);
>> > +     cs += 1; /* addr */
>> > +     cs += 1; /* value: implicit 0xffffffff */
>> > +     return cs;
>> > +}
>> > +static uint32_t *
>> > +gen8_emit_store_addr(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
>> > +{
>> > +     *cs++ = (MI_STORE_DWORD_IMM | 1 << 21) + 1;
>> > +     addr->offset += sizeof(*cs);
>> > +     igt_assert((addr->delta & 7) == 0);
>> > +     cs += 2; /* addr */
>> > +     cs += 2; /* value: implicit 0xffffffffffffffff */
>> > +     return cs;
>> >  }
>> >  
>> > -static void exec(int fd, uint32_t handle)
>> > +static uint32_t *
>> > +gen2_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
>> >  {
>> > -     struct drm_i915_gem_execbuffer2 execbuf;
>> > -     struct drm_i915_gem_exec_object2 gem_exec;
>> > +     *cs++ = MI_BATCH_BUFFER_START | 2 << 6;
>> > +     reloc->offset += sizeof(*cs);
>> > +     reloc->delta += 1;
>> > +     cs += 1; /* addr */
>> > +     return cs;
>> > +}
>> > +static uint32_t *
>> > +gen4_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
>> > +{
>> > +     *cs++ = MI_BATCH_BUFFER_START | 2 << 6 | 1 << 8;
>> > +     reloc->offset += sizeof(*cs);
>> > +     cs += 1; /* addr */
>> > +     return cs;
>> > +}
>> > +static uint32_t *
>> > +gen6_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
>> > +{
>> > +     *cs++ = MI_BATCH_BUFFER_START | 1 << 8;
>> > +     reloc->offset += sizeof(*cs);
>> > +     cs += 1; /* addr */
>> > +     return cs;
>> > +}
>> > +static uint32_t *
>> > +hsw_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
>> > +{
>> > +     *cs++ = MI_BATCH_BUFFER_START | 2 << 6 | 1 << 8 | 1 << 13;
>> > +     reloc->offset += sizeof(*cs);
>> > +     cs += 1; /* addr */
>> > +     return cs;
>> > +}
>> > +static uint32_t *
>> > +gen8_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
>> > +{
>> > +     if (((uintptr_t)cs & 7) == 0) {
>> > +             *cs++ = MI_NOOP; /* align addr for MI_STORE_DWORD_IMM */
>> > +             reloc->offset += sizeof(*cs);
>> > +     }
>> 
>> Align it so that after the bb start emitted we are in right alignment?
>> Otherwise it looks it should have '!' in it.
>
> It's so the batch buffer address is aligned to the qword (since we use a
> qword MI_STORE_DATA_IMM which requires destination alignment)
>
> Imagine s/reloc/addr/ might be a better clue.

Ok. Some undefined bits on bb starts. But sometimes magic is warranted,
as in here distinction between trickery and magic is blurred. Alerting
and warming up the reader to avoid sprains.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> -chris
diff mbox series

Patch

diff --git a/tests/i915/gem_cpu_reloc.c b/tests/i915/gem_cpu_reloc.c
index 882c312d4..33c4e4e0f 100644
--- a/tests/i915/gem_cpu_reloc.c
+++ b/tests/i915/gem_cpu_reloc.c
@@ -59,214 +59,245 @@ 
 
 #include "intel_bufmgr.h"
 
-IGT_TEST_DESCRIPTION("Test the relocations through the CPU domain.");
+#define MI_INSTR(opcode, flags) ((opcode) << 23 | (flags))
 
-static uint32_t use_blt;
+IGT_TEST_DESCRIPTION("Test the relocations through the CPU domain.");
 
-static void copy(int fd, uint32_t batch, uint32_t src, uint32_t dst)
+static uint32_t *
+gen2_emit_store_addr(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
 {
-	struct drm_i915_gem_execbuffer2 execbuf;
-	struct drm_i915_gem_relocation_entry gem_reloc[2];
-	struct drm_i915_gem_exec_object2 gem_exec[3];
-
-	gem_reloc[0].offset = 4 * sizeof(uint32_t);
-	gem_reloc[0].delta = 0;
-	gem_reloc[0].target_handle = dst;
-	gem_reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
-	gem_reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
-	gem_reloc[0].presumed_offset = -1;
-
-	gem_reloc[1].offset = 7 * sizeof(uint32_t);
-	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
-		gem_reloc[1].offset += sizeof(uint32_t);
-	gem_reloc[1].delta = 0;
-	gem_reloc[1].target_handle = src;
-	gem_reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
-	gem_reloc[1].write_domain = 0;
-	gem_reloc[1].presumed_offset = -1;
-
-	memset(gem_exec, 0, sizeof(gem_exec));
-	gem_exec[0].handle = src;
-	gem_exec[1].handle = dst;
-	gem_exec[2].handle = batch;
-	gem_exec[2].relocation_count = 2;
-	gem_exec[2].relocs_ptr = to_user_pointer(gem_reloc);
-
-	memset(&execbuf, 0, sizeof(execbuf));
-	execbuf.buffers_ptr = to_user_pointer(gem_exec);
-	execbuf.buffer_count = 3;
-	execbuf.batch_len = 4096;
-	execbuf.flags = use_blt;
-
-	gem_execbuf(fd, &execbuf);
+	*cs++ = MI_STORE_DWORD_IMM - 1;
+	addr->offset += sizeof(*cs);
+	cs += 1; /* addr */
+	cs += 1; /* value: implicit 0xffffffff */
+	return cs;
+}
+static uint32_t *
+gen4_emit_store_addr(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
+{
+	*cs++ = MI_STORE_DWORD_IMM;
+	*cs++ = 0;
+	addr->offset += 2 * sizeof(*cs);
+	cs += 1; /* addr */
+	cs += 1; /* value: implicit 0xffffffff */
+	return cs;
+}
+static uint32_t *
+gen8_emit_store_addr(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
+{
+	*cs++ = (MI_STORE_DWORD_IMM | 1 << 21) + 1;
+	addr->offset += sizeof(*cs);
+	igt_assert((addr->delta & 7) == 0);
+	cs += 2; /* addr */
+	cs += 2; /* value: implicit 0xffffffffffffffff */
+	return cs;
 }
 
-static void exec(int fd, uint32_t handle)
+static uint32_t *
+gen2_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
 {
-	struct drm_i915_gem_execbuffer2 execbuf;
-	struct drm_i915_gem_exec_object2 gem_exec;
+	*cs++ = MI_BATCH_BUFFER_START | 2 << 6;
+	reloc->offset += sizeof(*cs);
+	reloc->delta += 1;
+	cs += 1; /* addr */
+	return cs;
+}
+static uint32_t *
+gen4_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
+{
+	*cs++ = MI_BATCH_BUFFER_START | 2 << 6 | 1 << 8;
+	reloc->offset += sizeof(*cs);
+	cs += 1; /* addr */
+	return cs;
+}
+static uint32_t *
+gen6_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
+{
+	*cs++ = MI_BATCH_BUFFER_START | 1 << 8;
+	reloc->offset += sizeof(*cs);
+	cs += 1; /* addr */
+	return cs;
+}
+static uint32_t *
+hsw_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
+{
+	*cs++ = MI_BATCH_BUFFER_START | 2 << 6 | 1 << 8 | 1 << 13;
+	reloc->offset += sizeof(*cs);
+	cs += 1; /* addr */
+	return cs;
+}
+static uint32_t *
+gen8_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
+{
+	if (((uintptr_t)cs & 7) == 0) {
+		*cs++ = MI_NOOP; /* align addr for MI_STORE_DWORD_IMM */
+		reloc->offset += sizeof(*cs);
+	}
 
-	memset(&gem_exec, 0, sizeof(gem_exec));
-	gem_exec.handle = handle;
+	*cs++ = MI_BATCH_BUFFER_START + 1;
+	reloc->offset += sizeof(*cs);
+	cs += 2; /* addr */
 
-	memset(&execbuf, 0, sizeof(execbuf));
-	execbuf.buffers_ptr = to_user_pointer(&gem_exec);
-	execbuf.buffer_count = 1;
-	execbuf.batch_len = 4096;
+	return cs;
+}
 
-	gem_execbuf(fd, &execbuf);
+static void *
+create_tmpl(int i915, struct drm_i915_gem_relocation_entry *reloc)
+{
+	const uint32_t devid = intel_get_drm_devid(i915);
+	const int gen = intel_gen(devid);
+	uint32_t *(*emit_store_addr)(uint32_t *cs,
+				   struct drm_i915_gem_relocation_entry *addr);
+	uint32_t *(*emit_bb_start)(uint32_t *cs,
+				   struct drm_i915_gem_relocation_entry *reloc);
+	void *tmpl;
+
+	/* could use BLT_FILL instead for gen2 */
+	igt_require(gem_can_store_dword(i915, 0));
+
+	if (gen >= 8)
+		emit_store_addr = gen8_emit_store_addr;
+	else if (gen >= 4)
+		emit_store_addr = gen4_emit_store_addr;
+	else
+		emit_store_addr = gen2_emit_store_addr;
+
+	if (gen >= 8)
+		emit_bb_start = gen8_emit_bb_start;
+	else if (IS_HASWELL(devid))
+		emit_bb_start = hsw_emit_bb_start;
+	else if (gen >= 6)
+		emit_bb_start = gen6_emit_bb_start;
+	else if (gen >= 4)
+		emit_bb_start = gen4_emit_bb_start;
+	else
+		emit_bb_start = gen2_emit_bb_start;
+
+	tmpl = malloc(4096);
+	igt_assert(tmpl);
+	memset(tmpl, 0xff, 4096);
+
+	/* Jump over the booby traps to the end */
+	reloc[0].delta = 64;
+	emit_bb_start(tmpl, &reloc[0]);
+
+	/* Restore the bad address to catch missing relocs */
+	reloc[1].offset = 64;
+	reloc[1].delta = reloc[0].offset;
+	*emit_store_addr(tmpl + 64, &reloc[1]) = MI_BATCH_BUFFER_END;
+
+	return tmpl;
 }
 
-uint32_t gen6_batch[] = {
-	(XY_SRC_COPY_BLT_CMD | 6 |
-	 XY_SRC_COPY_BLT_WRITE_ALPHA |
-	 XY_SRC_COPY_BLT_WRITE_RGB),
-	(3 << 24 | /* 32 bits */
-	 0xcc << 16 | /* copy ROP */
-	 4096),
-	0 << 16 | 0, /* dst x1, y1 */
-	1 << 16 | 2,
-	0, /* dst relocation */
-	0 << 16 | 0, /* src x1, y1 */
-	4096,
-	0, /* src relocation */
-	MI_BATCH_BUFFER_END,
-};
-
-uint32_t gen8_batch[] = {
-	(XY_SRC_COPY_BLT_CMD | 8 |
-	 XY_SRC_COPY_BLT_WRITE_ALPHA |
-	 XY_SRC_COPY_BLT_WRITE_RGB),
-	(3 << 24 | /* 32 bits */
-	 0xcc << 16 | /* copy ROP */
-	 4096),
-	0 << 16 | 0, /* dst x1, y1 */
-	1 << 16 | 2,
-	0, /* dst relocation */
-	0, /* FIXME */
-	0 << 16 | 0, /* src x1, y1 */
-	4096,
-	0, /* src relocation */
-	0, /* FIXME */
-	MI_BATCH_BUFFER_END,
-};
-
-uint32_t *batch = gen6_batch;
-uint32_t batch_size = sizeof(gen6_batch);
-
-static void run_test(int fd, int count)
+static void run_test(int i915, int count)
 {
-	const uint32_t hang[] = {-1, -1, -1, -1};
-	const uint32_t end[] = {MI_BATCH_BUFFER_END, 0};
-	uint32_t noop;
-	uint32_t *handles;
-	int i;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_relocation_entry reloc[2];
+	struct drm_i915_gem_exec_object2 obj;
 
-	noop = intel_get_drm_devid(fd);
+	uint32_t *handles;
+	uint32_t *tmpl;
 
-	use_blt = 0;
-	if (intel_gen(noop) >= 6)
-		use_blt = I915_EXEC_BLT;
+	handles = malloc(count * sizeof(uint32_t));
+	igt_assert(handles);
 
-	if (intel_gen(noop) >= 8) {
-		batch = gen8_batch;
-		batch_size += 2 * 4;
+	memset(reloc, 0, sizeof(reloc));
+	tmpl = create_tmpl(i915, reloc);
+	for (int i = 0; i < count; i++) {
+		handles[i] = gem_create(i915, 4096);
+		gem_write(i915, handles[i], 0, tmpl, 4096);
 	}
+	free(tmpl);
 
-	handles = malloc (count * sizeof(uint32_t));
-	igt_assert(handles);
+	memset(&obj, 0, sizeof(obj));
+	obj.relocs_ptr = to_user_pointer(reloc);
+	obj.relocation_count = ARRAY_SIZE(reloc);
 
-	noop = gem_create(fd, 4096);
-	gem_write(fd, noop, 0, end, sizeof(end));
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj);
+	execbuf.buffer_count = 1;
 
 	/* fill the entire gart with batches and run them */
-	for (i = 0; i < count; i++) {
-		uint32_t bad;
-
-		handles[i] = gem_create(fd, 4096);
-		gem_write(fd, handles[i], 0, batch, batch_size);
-
-		bad = gem_create(fd, 4096);
-		gem_write(fd, bad, 0, hang, sizeof(hang));
-		gem_write(fd, bad, 4096-sizeof(end), end, sizeof(end));
+	for (int i = 0; i < count; i++) {
+		obj.handle = handles[i];
 
-		/* launch the newly created batch */
-		copy(fd, handles[i], noop, bad);
-		exec(fd, bad);
-		gem_close(fd, bad);
+		reloc[0].target_handle = obj.handle;
+		reloc[0].presumed_offset = -1;
+		reloc[1].target_handle = obj.handle;
+		reloc[1].presumed_offset = -1;
 
-		igt_progress("gem_cpu_reloc: ", i, 2*count);
+		gem_execbuf(i915, &execbuf);
 	}
 
 	/* And again in reverse to try and catch the relocation code out */
-	for (i = 0; i < count; i++) {
-		uint32_t bad;
+	for (int i = 0; i < count; i++) {
+		obj.handle = handles[count - i - 1];
 
-		bad = gem_create(fd, 4096);
-		gem_write(fd, bad, 0, hang, sizeof(hang));
-		gem_write(fd, bad, 4096-sizeof(end), end, sizeof(end));
+		reloc[0].target_handle = obj.handle;
+		reloc[0].presumed_offset = -1;
+		reloc[1].target_handle = obj.handle;
+		reloc[1].presumed_offset = -1;
 
-		/* launch the newly created batch */
-		copy(fd, handles[count-i-1], noop, bad);
-		exec(fd, bad);
-		gem_close(fd, bad);
-
-		igt_progress("gem_cpu_reloc: ", count+i, 3*count);
+		gem_execbuf(i915, &execbuf);
 	}
 
-	/* Third time lucky? */
-	for (i = 0; i < count; i++) {
-		uint32_t bad;
+	/* Third time unlucky? */
+	for (int i = 0; i < count; i++) {
+		obj.handle = handles[i];
 
-		bad = gem_create(fd, 4096);
-		gem_write(fd, bad, 0, hang, sizeof(hang));
-		gem_write(fd, bad, 4096-sizeof(end), end, sizeof(end));
+		reloc[0].target_handle = obj.handle;
+		reloc[0].presumed_offset = -1;
+		reloc[1].target_handle = obj.handle;
+		reloc[1].presumed_offset = -1;
 
-		/* launch the newly created batch */
-		gem_set_domain(fd, handles[i],
-			       I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
-		copy(fd, handles[i], noop, bad);
-		exec(fd, bad);
-		gem_close(fd, bad);
+		gem_set_domain(i915, obj.handle,
+			       I915_GEM_DOMAIN_CPU,
+			       I915_GEM_DOMAIN_CPU);
 
-		igt_progress("gem_cpu_reloc: ", 2*count+i, 3*count);
+		gem_execbuf(i915, &execbuf);
 	}
 
-	igt_info("Subtest suceeded, cleanup up - this might take a while.\n");
-	for (i = 0; i < count; i++) {
-		gem_close(fd, handles[i]);
-	}
-	gem_close(fd, noop);
+	for (int i = 0; i < count; i++)
+		gem_close(i915, handles[i]);
 	free(handles);
 }
 
 igt_main
 {
-	uint64_t aper_size;
-	int fd, count;
+	int i915;
 
 	igt_fixture {
-		fd = drm_open_driver(DRIVER_INTEL);
-		igt_require_gem(fd);
-	}
+		i915 = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(i915);
 
-	igt_subtest("basic") {
-		run_test (fd, 10);
+		igt_fork_hang_detector(i915);
 	}
 
+	igt_subtest("basic")
+		run_test(i915, 1);
 
 	igt_subtest("full") {
-		aper_size = gem_mappable_aperture_size();
-		count = aper_size / 4096 * 2;
+		uint64_t aper_size = gem_mappable_aperture_size();
+		unsigned long count = aper_size / 4096 + 1;
+
+		intel_require_memory(count, 4096, CHECK_RAM);
+
+		run_test(i915, count);
+	}
+
+	igt_subtest("forked") {
+		uint64_t aper_size = gem_mappable_aperture_size();
+		unsigned long count = aper_size / 4096 + 1;
+		int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
 
-		/* count + 2 (noop & bad) buffers. A gem object appears to
-                   require about 2kb + buffer + kernel overhead */
-		intel_require_memory(2+count, 2048+4096, CHECK_RAM);
+		intel_require_memory(count, 4096, CHECK_RAM);
 
-		run_test (fd, count);
+		igt_fork(child, ncpus)
+			run_test(i915, count / ncpus + 1);
+		igt_waitchildren();
 	}
 
 	igt_fixture {
-		close(fd);
+		igt_stop_hang_detector();
 	}
 }