[i-g-t] igt/gem_exec_gttfill: Avoid pwrite into busy handle

Message ID	20180628213532.22008-1-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Thu, 28 Jun 2018 22:35:32 +0100 Message-Id: <20180628213532.22008-1-chris@chris-wilson.co.uk> Subject: [Intel-gfx] [PATCH i-g-t] igt/gem_exec_gttfill: Avoid pwrite into busy handle Precedence: list Cc: igt-dev@lists.freedesktop.org MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Message ID

20180628213532.22008-1-chris@chris-wilson.co.uk (mailing list archive)

State

New, archived

Headers

From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Date: Thu, 28 Jun 2018 22:35:32 +0100
Message-Id: <20180628213532.22008-1-chris@chris-wilson.co.uk>
Subject: [Intel-gfx] [PATCH i-g-t] igt/gem_exec_gttfill: Avoid pwrite into
	busy handle
Precedence: list
Cc: igt-dev@lists.freedesktop.org
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
Errors-To: intel-gfx-bounces@lists.freedesktop.org
Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Commit Message

Chris Wilson June 28, 2018, 9:35 p.m. UTC

The goal of gem_exec_gttfill is to exercise execbuf under heavy GTT
pressure (by trying to execute more objects than may fit into the GTT).
We spread the same set of handles across different processes, with the
result that each would occasionally stall waiting for execution of an
unrelated batch, limiting the pressure we were applying. If we using a
steaming write via a WC pointer, we can avoid the serialisation penalty
and so submit faster.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/gem_exec_gttfill.c | 66 +++++++++++++++++++++++++---------------
 1 file changed, 42 insertions(+), 24 deletions(-)

Comments

Tvrtko Ursulin June 29, 2018, 3:15 p.m. UTC | #1

On 28/06/2018 22:35, Chris Wilson wrote:
> The goal of gem_exec_gttfill is to exercise execbuf under heavy GTT
> pressure (by trying to execute more objects than may fit into the GTT).
> We spread the same set of handles across different processes, with the
> result that each would occasionally stall waiting for execution of an
> unrelated batch, limiting the pressure we were applying. If we using a
> steaming write via a WC pointer, we can avoid the serialisation penalty
> and so submit faster.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   tests/gem_exec_gttfill.c | 66 +++++++++++++++++++++++++---------------
>   1 file changed, 42 insertions(+), 24 deletions(-)
> 
> diff --git a/tests/gem_exec_gttfill.c b/tests/gem_exec_gttfill.c
> index 4097e4077..efd612bb6 100644
> --- a/tests/gem_exec_gttfill.c
> +++ b/tests/gem_exec_gttfill.c
> @@ -28,18 +28,25 @@ IGT_TEST_DESCRIPTION("Fill the GTT with batches.");
>   
>   #define BATCH_SIZE (4096<<10)
>   
> -static void xchg_u32(void *array, unsigned i, unsigned j)
> +struct batch {
> +	uint32_t handle;
> +	void *ptr;
> +};
> +
> +static void xchg_batch(void *array, unsigned int i, unsigned int j)
>   {
> -	uint32_t *u32 = array;
> -	uint32_t tmp = u32[i];
> -	u32[i] = u32[j];
> -	u32[j] = tmp;
> +	struct batch *batches = array;
> +	struct batch tmp;
> +
> +	tmp = batches[i];
> +	batches[i] = batches[j];
> +	batches[j] = tmp;
>   }
>   
>   static void submit(int fd, int gen,
>   		   struct drm_i915_gem_execbuffer2 *eb,
>   		   struct drm_i915_gem_relocation_entry *reloc,
> -		   uint32_t *handles, unsigned count)
> +		   struct batch *batches, unsigned int count)
>   {
>   	struct drm_i915_gem_exec_object2 obj;
>   	uint32_t batch[16];
> @@ -80,7 +87,7 @@ static void submit(int fd, int gen,
>   
>   	eb->buffers_ptr = to_user_pointer(&obj);
>   	for (unsigned i = 0; i < count; i++) {
> -		obj.handle = handles[i];
> +		obj.handle = batches[i].handle;
>   		reloc[0].target_handle = obj.handle;
>   		reloc[1].target_handle = obj.handle;
>   
> @@ -88,8 +95,8 @@ static void submit(int fd, int gen,
>   		reloc[0].presumed_offset = obj.offset;
>   		reloc[1].presumed_offset = obj.offset;
>   
> -		gem_write(fd, obj.handle, eb->batch_start_offset,
> -			  batch, sizeof(batch));
> +		memcpy(batches[i].ptr + eb->batch_start_offset,
> +		       batch, sizeof(batch));
>   
>   		gem_execbuf(fd, eb);
>   	}
> @@ -103,7 +110,7 @@ static void fillgtt(int fd, unsigned ring, int timeout)
>   	struct drm_i915_gem_execbuffer2 execbuf;
>   	struct drm_i915_gem_relocation_entry reloc[2];
>   	volatile uint64_t *shared;
> -	unsigned *handles;
> +	struct batch *batches;
>   	unsigned engines[16];
>   	unsigned nengine;
>   	unsigned engine;
> @@ -145,29 +152,38 @@ static void fillgtt(int fd, unsigned ring, int timeout)
>   	if (gen < 6)
>   		execbuf.flags |= I915_EXEC_SECURE;
>   
> -	handles = calloc(count, sizeof(handles));
> -	igt_assert(handles);
> -	for (unsigned i = 0; i < count; i++)
> -		handles[i] = gem_create(fd, BATCH_SIZE);
> +	batches = calloc(count, sizeof(*batches));
> +	igt_assert(batches);
> +	for (unsigned i = 0; i < count; i++) {
> +		batches[i].handle = gem_create(fd, BATCH_SIZE);
> +		batches[i].ptr =
> +			__gem_mmap__wc(fd, batches[i].handle,
> +				       0, BATCH_SIZE, PROT_WRITE);
> +		if (!batches[i].ptr) {
> +			batches[i].ptr =
> +				__gem_mmap__gtt(fd, batches[i].handle,
> +						BATCH_SIZE, PROT_WRITE);
> +		}
> +		igt_require(batches[i].ptr);

Not assert?

> +	}
>   
>   	/* Flush all memory before we start the timer */
> -	submit(fd, gen, &execbuf, reloc, handles, count);
> +	submit(fd, gen, &execbuf, reloc, batches, count);
>   
>   	igt_fork(child, nengine) {
>   		uint64_t cycles = 0;
>   		hars_petruska_f54_1_random_perturb(child);
> -		igt_permute_array(handles, count, xchg_u32);
> +		igt_permute_array(batches, count, xchg_batch);
>   		execbuf.batch_start_offset = child*64;
>   		execbuf.flags |= engines[child];
>   		igt_until_timeout(timeout) {
> -			submit(fd, gen, &execbuf, reloc, handles, count);
> +			submit(fd, gen, &execbuf, reloc, batches, count);
>   			for (unsigned i = 0; i < count; i++) {
> -				uint32_t handle = handles[i];
> -				uint64_t buf[2];
> +				uint64_t offset, delta;
>   
> -				gem_read(fd, handle, reloc[1].offset, &buf[0], sizeof(buf[0]));
> -				gem_read(fd, handle, reloc[0].delta, &buf[1], sizeof(buf[1]));
> -				igt_assert_eq_u64(buf[0], buf[1]);

No flushing or domain management needed, especially since it can be 
either wc or gtt mmap?

> +				offset = *(uint64_t *)(batches[i].ptr + reloc[1].offset);
> +				delta = *(uint64_t *)(batches[i].ptr + reloc[0].delta);
> +				igt_assert_eq_u64(offset, delta);
>   			}
>   			cycles++;
>   		}
> @@ -176,8 +192,10 @@ static void fillgtt(int fd, unsigned ring, int timeout)
>   	}
>   	igt_waitchildren();
>   
> -	for (unsigned i = 0; i < count; i++)
> -		gem_close(fd, handles[i]);
> +	for (unsigned i = 0; i < count; i++) {
> +		munmap(batches[i].ptr, BATCH_SIZE);
> +		gem_close(fd, batches[i].handle);
> +	}
>   
>   	shared[nengine] = 0;
>   	for (unsigned i = 0; i < nengine; i++)
> 
Regards,

Tvrtko

Chris Wilson June 29, 2018, 3:22 p.m. UTC | #2

Quoting Tvrtko Ursulin (2018-06-29 16:15:04)
> 
> On 28/06/2018 22:35, Chris Wilson wrote:
> > The goal of gem_exec_gttfill is to exercise execbuf under heavy GTT
> > pressure (by trying to execute more objects than may fit into the GTT).
> > We spread the same set of handles across different processes, with the
> > result that each would occasionally stall waiting for execution of an
> > unrelated batch, limiting the pressure we were applying. If we using a
> > steaming write via a WC pointer, we can avoid the serialisation penalty
> > and so submit faster.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   tests/gem_exec_gttfill.c | 66 +++++++++++++++++++++++++---------------
> >   1 file changed, 42 insertions(+), 24 deletions(-)
> > 
> > diff --git a/tests/gem_exec_gttfill.c b/tests/gem_exec_gttfill.c
> > index 4097e4077..efd612bb6 100644
> > --- a/tests/gem_exec_gttfill.c
> > +++ b/tests/gem_exec_gttfill.c
> > @@ -28,18 +28,25 @@ IGT_TEST_DESCRIPTION("Fill the GTT with batches.");
> >   
> >   #define BATCH_SIZE (4096<<10)
> >   
> > -static void xchg_u32(void *array, unsigned i, unsigned j)
> > +struct batch {
> > +     uint32_t handle;
> > +     void *ptr;
> > +};
> > +
> > +static void xchg_batch(void *array, unsigned int i, unsigned int j)
> >   {
> > -     uint32_t *u32 = array;
> > -     uint32_t tmp = u32[i];
> > -     u32[i] = u32[j];
> > -     u32[j] = tmp;
> > +     struct batch *batches = array;
> > +     struct batch tmp;
> > +
> > +     tmp = batches[i];
> > +     batches[i] = batches[j];
> > +     batches[j] = tmp;
> >   }
> >   
> >   static void submit(int fd, int gen,
> >                  struct drm_i915_gem_execbuffer2 *eb,
> >                  struct drm_i915_gem_relocation_entry *reloc,
> > -                uint32_t *handles, unsigned count)
> > +                struct batch *batches, unsigned int count)
> >   {
> >       struct drm_i915_gem_exec_object2 obj;
> >       uint32_t batch[16];
> > @@ -80,7 +87,7 @@ static void submit(int fd, int gen,
> >   
> >       eb->buffers_ptr = to_user_pointer(&obj);
> >       for (unsigned i = 0; i < count; i++) {
> > -             obj.handle = handles[i];
> > +             obj.handle = batches[i].handle;
> >               reloc[0].target_handle = obj.handle;
> >               reloc[1].target_handle = obj.handle;
> >   
> > @@ -88,8 +95,8 @@ static void submit(int fd, int gen,
> >               reloc[0].presumed_offset = obj.offset;
> >               reloc[1].presumed_offset = obj.offset;
> >   
> > -             gem_write(fd, obj.handle, eb->batch_start_offset,
> > -                       batch, sizeof(batch));
> > +             memcpy(batches[i].ptr + eb->batch_start_offset,
> > +                    batch, sizeof(batch));
> >   
> >               gem_execbuf(fd, eb);
> >       }
> > @@ -103,7 +110,7 @@ static void fillgtt(int fd, unsigned ring, int timeout)
> >       struct drm_i915_gem_execbuffer2 execbuf;
> >       struct drm_i915_gem_relocation_entry reloc[2];
> >       volatile uint64_t *shared;
> > -     unsigned *handles;
> > +     struct batch *batches;
> >       unsigned engines[16];
> >       unsigned nengine;
> >       unsigned engine;
> > @@ -145,29 +152,38 @@ static void fillgtt(int fd, unsigned ring, int timeout)
> >       if (gen < 6)
> >               execbuf.flags |= I915_EXEC_SECURE;
> >   
> > -     handles = calloc(count, sizeof(handles));
> > -     igt_assert(handles);
> > -     for (unsigned i = 0; i < count; i++)
> > -             handles[i] = gem_create(fd, BATCH_SIZE);
> > +     batches = calloc(count, sizeof(*batches));
> > +     igt_assert(batches);
> > +     for (unsigned i = 0; i < count; i++) {
> > +             batches[i].handle = gem_create(fd, BATCH_SIZE);
> > +             batches[i].ptr =
> > +                     __gem_mmap__wc(fd, batches[i].handle,
> > +                                    0, BATCH_SIZE, PROT_WRITE);
> > +             if (!batches[i].ptr) {
> > +                     batches[i].ptr =
> > +                             __gem_mmap__gtt(fd, batches[i].handle,
> > +                                             BATCH_SIZE, PROT_WRITE);
> > +             }
> > +             igt_require(batches[i].ptr);
> 
> Not assert?

If we fallback to using gtt, we are likely to run out of mappable space,
in which case we can't run the test. We should only fallback to gtt
because we can't support WC (the likelihood of it being ENOMEM is
small). So skip since a failure is expected on old kernels.

> > +     }
> >   
> >       /* Flush all memory before we start the timer */
> > -     submit(fd, gen, &execbuf, reloc, handles, count);
> > +     submit(fd, gen, &execbuf, reloc, batches, count);
> >   
> >       igt_fork(child, nengine) {
> >               uint64_t cycles = 0;
> >               hars_petruska_f54_1_random_perturb(child);
> > -             igt_permute_array(handles, count, xchg_u32);
> > +             igt_permute_array(batches, count, xchg_batch);
> >               execbuf.batch_start_offset = child*64;
> >               execbuf.flags |= engines[child];
> >               igt_until_timeout(timeout) {
> > -                     submit(fd, gen, &execbuf, reloc, handles, count);
> > +                     submit(fd, gen, &execbuf, reloc, batches, count);
> >                       for (unsigned i = 0; i < count; i++) {
> > -                             uint32_t handle = handles[i];
> > -                             uint64_t buf[2];
> > +                             uint64_t offset, delta;
> >   
> > -                             gem_read(fd, handle, reloc[1].offset, &buf[0], sizeof(buf[0]));
> > -                             gem_read(fd, handle, reloc[0].delta, &buf[1], sizeof(buf[1]));
> > -                             igt_assert_eq_u64(buf[0], buf[1]);
> 
> No flushing or domain management needed, especially since it can be 
> either wc or gtt mmap?

It's a UC read of a buffer known to already flushed from the CPU caches
with a prior gem_sync, so no not required. Considering that asynchronous
access is the whole point of the patch...
-Chris

Tvrtko Ursulin June 29, 2018, 3:43 p.m. UTC | #3

On 29/06/2018 16:22, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-06-29 16:15:04)
>>
>> On 28/06/2018 22:35, Chris Wilson wrote:
>>> The goal of gem_exec_gttfill is to exercise execbuf under heavy GTT
>>> pressure (by trying to execute more objects than may fit into the GTT).
>>> We spread the same set of handles across different processes, with the
>>> result that each would occasionally stall waiting for execution of an
>>> unrelated batch, limiting the pressure we were applying. If we using a
>>> steaming write via a WC pointer, we can avoid the serialisation penalty
>>> and so submit faster.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>    tests/gem_exec_gttfill.c | 66 +++++++++++++++++++++++++---------------
>>>    1 file changed, 42 insertions(+), 24 deletions(-)
>>>
>>> diff --git a/tests/gem_exec_gttfill.c b/tests/gem_exec_gttfill.c
>>> index 4097e4077..efd612bb6 100644
>>> --- a/tests/gem_exec_gttfill.c
>>> +++ b/tests/gem_exec_gttfill.c
>>> @@ -28,18 +28,25 @@ IGT_TEST_DESCRIPTION("Fill the GTT with batches.");
>>>    
>>>    #define BATCH_SIZE (4096<<10)
>>>    
>>> -static void xchg_u32(void *array, unsigned i, unsigned j)
>>> +struct batch {
>>> +     uint32_t handle;
>>> +     void *ptr;
>>> +};
>>> +
>>> +static void xchg_batch(void *array, unsigned int i, unsigned int j)
>>>    {
>>> -     uint32_t *u32 = array;
>>> -     uint32_t tmp = u32[i];
>>> -     u32[i] = u32[j];
>>> -     u32[j] = tmp;
>>> +     struct batch *batches = array;
>>> +     struct batch tmp;
>>> +
>>> +     tmp = batches[i];
>>> +     batches[i] = batches[j];
>>> +     batches[j] = tmp;
>>>    }
>>>    
>>>    static void submit(int fd, int gen,
>>>                   struct drm_i915_gem_execbuffer2 *eb,
>>>                   struct drm_i915_gem_relocation_entry *reloc,
>>> -                uint32_t *handles, unsigned count)
>>> +                struct batch *batches, unsigned int count)
>>>    {
>>>        struct drm_i915_gem_exec_object2 obj;
>>>        uint32_t batch[16];
>>> @@ -80,7 +87,7 @@ static void submit(int fd, int gen,
>>>    
>>>        eb->buffers_ptr = to_user_pointer(&obj);
>>>        for (unsigned i = 0; i < count; i++) {
>>> -             obj.handle = handles[i];
>>> +             obj.handle = batches[i].handle;
>>>                reloc[0].target_handle = obj.handle;
>>>                reloc[1].target_handle = obj.handle;
>>>    
>>> @@ -88,8 +95,8 @@ static void submit(int fd, int gen,
>>>                reloc[0].presumed_offset = obj.offset;
>>>                reloc[1].presumed_offset = obj.offset;
>>>    
>>> -             gem_write(fd, obj.handle, eb->batch_start_offset,
>>> -                       batch, sizeof(batch));
>>> +             memcpy(batches[i].ptr + eb->batch_start_offset,
>>> +                    batch, sizeof(batch));
>>>    
>>>                gem_execbuf(fd, eb);
>>>        }
>>> @@ -103,7 +110,7 @@ static void fillgtt(int fd, unsigned ring, int timeout)
>>>        struct drm_i915_gem_execbuffer2 execbuf;
>>>        struct drm_i915_gem_relocation_entry reloc[2];
>>>        volatile uint64_t *shared;
>>> -     unsigned *handles;
>>> +     struct batch *batches;
>>>        unsigned engines[16];
>>>        unsigned nengine;
>>>        unsigned engine;
>>> @@ -145,29 +152,38 @@ static void fillgtt(int fd, unsigned ring, int timeout)
>>>        if (gen < 6)
>>>                execbuf.flags |= I915_EXEC_SECURE;
>>>    
>>> -     handles = calloc(count, sizeof(handles));
>>> -     igt_assert(handles);
>>> -     for (unsigned i = 0; i < count; i++)
>>> -             handles[i] = gem_create(fd, BATCH_SIZE);
>>> +     batches = calloc(count, sizeof(*batches));
>>> +     igt_assert(batches);
>>> +     for (unsigned i = 0; i < count; i++) {
>>> +             batches[i].handle = gem_create(fd, BATCH_SIZE);
>>> +             batches[i].ptr =
>>> +                     __gem_mmap__wc(fd, batches[i].handle,
>>> +                                    0, BATCH_SIZE, PROT_WRITE);
>>> +             if (!batches[i].ptr) {
>>> +                     batches[i].ptr =
>>> +                             __gem_mmap__gtt(fd, batches[i].handle,
>>> +                                             BATCH_SIZE, PROT_WRITE);
>>> +             }
>>> +             igt_require(batches[i].ptr);
>>
>> Not assert?
> 
> If we fallback to using gtt, we are likely to run out of mappable space,
> in which case we can't run the test. We should only fallback to gtt
> because we can't support WC (the likelihood of it being ENOMEM is
> small). So skip since a failure is expected on old kernels.
> 
>>> +     }
>>>    
>>>        /* Flush all memory before we start the timer */
>>> -     submit(fd, gen, &execbuf, reloc, handles, count);
>>> +     submit(fd, gen, &execbuf, reloc, batches, count);
>>>    
>>>        igt_fork(child, nengine) {
>>>                uint64_t cycles = 0;
>>>                hars_petruska_f54_1_random_perturb(child);
>>> -             igt_permute_array(handles, count, xchg_u32);
>>> +             igt_permute_array(batches, count, xchg_batch);
>>>                execbuf.batch_start_offset = child*64;
>>>                execbuf.flags |= engines[child];
>>>                igt_until_timeout(timeout) {
>>> -                     submit(fd, gen, &execbuf, reloc, handles, count);
>>> +                     submit(fd, gen, &execbuf, reloc, batches, count);
>>>                        for (unsigned i = 0; i < count; i++) {
>>> -                             uint32_t handle = handles[i];
>>> -                             uint64_t buf[2];
>>> +                             uint64_t offset, delta;
>>>    
>>> -                             gem_read(fd, handle, reloc[1].offset, &buf[0], sizeof(buf[0]));
>>> -                             gem_read(fd, handle, reloc[0].delta, &buf[1], sizeof(buf[1]));
>>> -                             igt_assert_eq_u64(buf[0], buf[1]);
>>
>> No flushing or domain management needed, especially since it can be
>> either wc or gtt mmap?
> 
> It's a UC read of a buffer known to already flushed from the CPU caches
> with a prior gem_sync, so no not required. Considering that asynchronous
> access is the whole point of the patch...

True.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

diff --git a/tests/gem_exec_gttfill.c b/tests/gem_exec_gttfill.c
index 4097e4077..efd612bb6 100644
--- a/tests/gem_exec_gttfill.c
+++ b/tests/gem_exec_gttfill.c
@@ -28,18 +28,25 @@  IGT_TEST_DESCRIPTION("Fill the GTT with batches.");
 
 #define BATCH_SIZE (4096<<10)
 
-static void xchg_u32(void *array, unsigned i, unsigned j)
+struct batch {
+	uint32_t handle;
+	void *ptr;
+};
+
+static void xchg_batch(void *array, unsigned int i, unsigned int j)
 {
-	uint32_t *u32 = array;
-	uint32_t tmp = u32[i];
-	u32[i] = u32[j];
-	u32[j] = tmp;
+	struct batch *batches = array;
+	struct batch tmp;
+
+	tmp = batches[i];
+	batches[i] = batches[j];
+	batches[j] = tmp;
 }
 
 static void submit(int fd, int gen,
 		   struct drm_i915_gem_execbuffer2 *eb,
 		   struct drm_i915_gem_relocation_entry *reloc,
-		   uint32_t *handles, unsigned count)
+		   struct batch *batches, unsigned int count)
 {
 	struct drm_i915_gem_exec_object2 obj;
 	uint32_t batch[16];
@@ -80,7 +87,7 @@  static void submit(int fd, int gen,
 
 	eb->buffers_ptr = to_user_pointer(&obj);
 	for (unsigned i = 0; i < count; i++) {
-		obj.handle = handles[i];
+		obj.handle = batches[i].handle;
 		reloc[0].target_handle = obj.handle;
 		reloc[1].target_handle = obj.handle;
 
@@ -88,8 +95,8 @@  static void submit(int fd, int gen,
 		reloc[0].presumed_offset = obj.offset;
 		reloc[1].presumed_offset = obj.offset;
 
-		gem_write(fd, obj.handle, eb->batch_start_offset,
-			  batch, sizeof(batch));
+		memcpy(batches[i].ptr + eb->batch_start_offset,
+		       batch, sizeof(batch));
 
 		gem_execbuf(fd, eb);
 	}
@@ -103,7 +110,7 @@  static void fillgtt(int fd, unsigned ring, int timeout)
 	struct drm_i915_gem_execbuffer2 execbuf;
 	struct drm_i915_gem_relocation_entry reloc[2];
 	volatile uint64_t *shared;
-	unsigned *handles;
+	struct batch *batches;
 	unsigned engines[16];
 	unsigned nengine;
 	unsigned engine;
@@ -145,29 +152,38 @@  static void fillgtt(int fd, unsigned ring, int timeout)
 	if (gen < 6)
 		execbuf.flags |= I915_EXEC_SECURE;
 
-	handles = calloc(count, sizeof(handles));
-	igt_assert(handles);
-	for (unsigned i = 0; i < count; i++)
-		handles[i] = gem_create(fd, BATCH_SIZE);
+	batches = calloc(count, sizeof(*batches));
+	igt_assert(batches);
+	for (unsigned i = 0; i < count; i++) {
+		batches[i].handle = gem_create(fd, BATCH_SIZE);
+		batches[i].ptr =
+			__gem_mmap__wc(fd, batches[i].handle,
+				       0, BATCH_SIZE, PROT_WRITE);
+		if (!batches[i].ptr) {
+			batches[i].ptr =
+				__gem_mmap__gtt(fd, batches[i].handle,
+						BATCH_SIZE, PROT_WRITE);
+		}
+		igt_require(batches[i].ptr);
+	}
 
 	/* Flush all memory before we start the timer */
-	submit(fd, gen, &execbuf, reloc, handles, count);
+	submit(fd, gen, &execbuf, reloc, batches, count);
 
 	igt_fork(child, nengine) {
 		uint64_t cycles = 0;
 		hars_petruska_f54_1_random_perturb(child);
-		igt_permute_array(handles, count, xchg_u32);
+		igt_permute_array(batches, count, xchg_batch);
 		execbuf.batch_start_offset = child*64;
 		execbuf.flags |= engines[child];
 		igt_until_timeout(timeout) {
-			submit(fd, gen, &execbuf, reloc, handles, count);
+			submit(fd, gen, &execbuf, reloc, batches, count);
 			for (unsigned i = 0; i < count; i++) {
-				uint32_t handle = handles[i];
-				uint64_t buf[2];
+				uint64_t offset, delta;
 
-				gem_read(fd, handle, reloc[1].offset, &buf[0], sizeof(buf[0]));
-				gem_read(fd, handle, reloc[0].delta, &buf[1], sizeof(buf[1]));
-				igt_assert_eq_u64(buf[0], buf[1]);
+				offset = *(uint64_t *)(batches[i].ptr + reloc[1].offset);
+				delta = *(uint64_t *)(batches[i].ptr + reloc[0].delta);
+				igt_assert_eq_u64(offset, delta);
 			}
 			cycles++;
 		}
@@ -176,8 +192,10 @@  static void fillgtt(int fd, unsigned ring, int timeout)
 	}
 	igt_waitchildren();
 
-	for (unsigned i = 0; i < count; i++)
-		gem_close(fd, handles[i]);
+	for (unsigned i = 0; i < count; i++) {
+		munmap(batches[i].ptr, BATCH_SIZE);
+		gem_close(fd, batches[i].handle);
+	}
 
 	shared[nengine] = 0;
 	for (unsigned i = 0; i < nengine; i++)

[i-g-t] igt/gem_exec_gttfill: Avoid pwrite into busy handle

Commit Message

Comments

Patch