diff mbox

[i-g-t,v2,2/2] tests/gem_eio: Add reset and unwedge stress testing

Message ID 20180403113644.6855-1-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Tvrtko Ursulin April 3, 2018, 11:36 a.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Reset and unwedge stress testing is supposed to trigger wedging or resets
at incovenient times and then re-use the context so either the context or
driver tracking might get confused and break.

v2:
 * Renamed for more sensible naming.
 * Added some comments to explain what the test is doing. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/gem_eio.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

Comments

Chris Wilson April 3, 2018, 11:51 a.m. UTC | #1
Quoting Tvrtko Ursulin (2018-04-03 12:36:44)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Reset and unwedge stress testing is supposed to trigger wedging or resets
> at incovenient times and then re-use the context so either the context or
> driver tracking might get confused and break.
> 
> v2:
>  * Renamed for more sensible naming.
>  * Added some comments to explain what the test is doing. (Chris Wilson)

You bring shame unto my tests with such beauty.

> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
Antonio Argenziano April 3, 2018, 6:24 p.m. UTC | #2
On 03/04/18 04:36, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Reset and unwedge stress testing is supposed to trigger wedging or resets
> at incovenient times and then re-use the context so either the context or
> driver tracking might get confused and break.
> 
> v2:
>   * Renamed for more sensible naming.
>   * Added some comments to explain what the test is doing. (Chris Wilson)
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   tests/gem_eio.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 74 insertions(+)
> 
> diff --git a/tests/gem_eio.c b/tests/gem_eio.c
> index b7c5047f0816..9599e73db736 100644
> --- a/tests/gem_eio.c
> +++ b/tests/gem_eio.c
> @@ -591,6 +591,74 @@ static void test_inflight_internal(int fd, unsigned int wait)
>   	close(fd);
>   }
>   
> +/*
> + * Verify that we can submit and execute work after unwedging the GPU.
> + */
> +static void test_reset_stress(int fd, unsigned int flags)
> +{
> +	uint32_t ctx0 = gem_context_create(fd);
> +
> +	igt_until_timeout(5) {
> +		struct drm_i915_gem_execbuffer2 execbuf = { };
> +		struct drm_i915_gem_exec_object2 obj = { };
> +		uint32_t bbe = MI_BATCH_BUFFER_END;
> +		igt_spin_t *hang;
> +		unsigned int i;
> +		uint32_t ctx;
> +
> +		gem_quiescent_gpu(fd);
> +
> +		igt_require(i915_reset_control(flags & TEST_WEDGE ?
> +					       false : true));
> +
> +		ctx = context_create_safe(fd);
> +
> +		/*
> +		 * Start executing a spin batch with some queued batches
> +		 * against a different context after it.
> +		 */

Aren't all batches queued on ctx0? Or is this a reference to the check 
on ctx you have later in the test.

Thanks,
Antonio

> +		hang = spin_sync(fd, ctx0, 0);
> +
> +		obj.handle = gem_create(fd, 4096);
> +		gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
> +
> +		execbuf.buffers_ptr = to_user_pointer(&obj);
> +		execbuf.buffer_count = 1;
> +		execbuf.rsvd1 = ctx0;
> +
> +		for (i = 0; i < 10; i++)
> +			gem_execbuf(fd, &execbuf);
> +
> +		/* Wedge after a small delay. */
> +		igt_assert_eq(__check_wait(fd, obj.handle, 100e3), 0);
> +
> +		/* Unwedge by forcing a reset. */
> +		igt_assert(i915_reset_control(true));
> +		trigger_reset(fd);
> +
> +		gem_quiescent_gpu(fd);
> +
> +		/*
> +		 * Verify that we are able to submit work after unwedging from
> +		 * both contexts.
> +		 */
> +		execbuf.rsvd1 = ctx;
> +		for (i = 0; i < 5; i++)
> +			gem_execbuf(fd, &execbuf);
> +
> +		execbuf.rsvd1 = ctx0;
> +		for (i = 0; i < 5; i++)
> +			gem_execbuf(fd, &execbuf);
> +
> +		gem_sync(fd, obj.handle);
> +		igt_spin_batch_free(fd, hang);
> +		gem_context_destroy(fd, ctx);
> +		gem_close(fd, obj.handle);
> +	}
> +
> +	gem_context_destroy(fd, ctx0);
> +}
> +
>   static int fd = -1;
>   
>   static void
> @@ -635,6 +703,12 @@ igt_main
>   	igt_subtest("in-flight-suspend")
>   		test_inflight_suspend(fd);
>   
> +	igt_subtest("reset-stress")
> +		test_reset_stress(fd, 0);
> +
> +	igt_subtest("unwedge-stress")
> +		test_reset_stress(fd, TEST_WEDGE);
> +
>   	igt_subtest_group {
>   		const struct {
>   			unsigned int wait;
>
Antonio Argenziano April 3, 2018, 6:34 p.m. UTC | #3
On 03/04/18 11:24, Antonio Argenziano wrote:
> 
> 
> On 03/04/18 04:36, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Reset and unwedge stress testing is supposed to trigger wedging or resets
>> at incovenient times and then re-use the context so either the context or
>> driver tracking might get confused and break.
>>
>> v2:
>>   * Renamed for more sensible naming.
>>   * Added some comments to explain what the test is doing. (Chris Wilson)
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>>   tests/gem_eio.c | 74 
>> +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 74 insertions(+)
>>
>> diff --git a/tests/gem_eio.c b/tests/gem_eio.c
>> index b7c5047f0816..9599e73db736 100644
>> --- a/tests/gem_eio.c
>> +++ b/tests/gem_eio.c
>> @@ -591,6 +591,74 @@ static void test_inflight_internal(int fd, 
>> unsigned int wait)
>>       close(fd);
>>   }
>> +/*
>> + * Verify that we can submit and execute work after unwedging the GPU.
>> + */
>> +static void test_reset_stress(int fd, unsigned int flags)
>> +{
>> +    uint32_t ctx0 = gem_context_create(fd);
>> +
>> +    igt_until_timeout(5) {
>> +        struct drm_i915_gem_execbuffer2 execbuf = { };
>> +        struct drm_i915_gem_exec_object2 obj = { };
>> +        uint32_t bbe = MI_BATCH_BUFFER_END;
>> +        igt_spin_t *hang;
>> +        unsigned int i;
>> +        uint32_t ctx;
>> +
>> +        gem_quiescent_gpu(fd);
>> +
>> +        igt_require(i915_reset_control(flags & TEST_WEDGE ?
>> +                           false : true));
>> +
>> +        ctx = context_create_safe(fd);
>> +
>> +        /*
>> +         * Start executing a spin batch with some queued batches
>> +         * against a different context after it.
>> +         */
> 
> Aren't all batches queued on ctx0? Or is this a reference to the check 
> on ctx you have later in the test.
> 
> Thanks,
> Antonio
> 
>> +        hang = spin_sync(fd, ctx0, 0);

I think you meant to send this^ on ctx.

Antonio.

>> +
>> +        obj.handle = gem_create(fd, 4096);
>> +        gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
>> +
>> +        execbuf.buffers_ptr = to_user_pointer(&obj);
>> +        execbuf.buffer_count = 1;
>> +        execbuf.rsvd1 = ctx0;
>> +
>> +        for (i = 0; i < 10; i++)
>> +            gem_execbuf(fd, &execbuf);
>> +
>> +        /* Wedge after a small delay. */
>> +        igt_assert_eq(__check_wait(fd, obj.handle, 100e3), 0);
>> +
>> +        /* Unwedge by forcing a reset. */
>> +        igt_assert(i915_reset_control(true));
>> +        trigger_reset(fd);
>> +
>> +        gem_quiescent_gpu(fd);
>> +
>> +        /*
>> +         * Verify that we are able to submit work after unwedging from
>> +         * both contexts.
>> +         */
>> +        execbuf.rsvd1 = ctx;
>> +        for (i = 0; i < 5; i++)
>> +            gem_execbuf(fd, &execbuf);
>> +
>> +        execbuf.rsvd1 = ctx0;
>> +        for (i = 0; i < 5; i++)
>> +            gem_execbuf(fd, &execbuf);
>> +
>> +        gem_sync(fd, obj.handle);
>> +        igt_spin_batch_free(fd, hang);
>> +        gem_context_destroy(fd, ctx);
>> +        gem_close(fd, obj.handle);
>> +    }
>> +
>> +    gem_context_destroy(fd, ctx0);
>> +}
>> +
>>   static int fd = -1;
>>   static void
>> @@ -635,6 +703,12 @@ igt_main
>>       igt_subtest("in-flight-suspend")
>>           test_inflight_suspend(fd);
>> +    igt_subtest("reset-stress")
>> +        test_reset_stress(fd, 0);
>> +
>> +    igt_subtest("unwedge-stress")
>> +        test_reset_stress(fd, TEST_WEDGE);
>> +
>>       igt_subtest_group {
>>           const struct {
>>               unsigned int wait;
>>
> _______________________________________________
> igt-dev mailing list
> igt-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/igt-dev
Tvrtko Ursulin April 4, 2018, 9:58 a.m. UTC | #4
On 03/04/2018 19:34, Antonio Argenziano wrote:
> 
> 
> On 03/04/18 11:24, Antonio Argenziano wrote:
>>
>>
>> On 03/04/18 04:36, Tvrtko Ursulin wrote:
>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>
>>> Reset and unwedge stress testing is supposed to trigger wedging or 
>>> resets
>>> at incovenient times and then re-use the context so either the 
>>> context or
>>> driver tracking might get confused and break.
>>>
>>> v2:
>>>   * Renamed for more sensible naming.
>>>   * Added some comments to explain what the test is doing. (Chris 
>>> Wilson)
>>>
>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>   tests/gem_eio.c | 74 
>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>   1 file changed, 74 insertions(+)
>>>
>>> diff --git a/tests/gem_eio.c b/tests/gem_eio.c
>>> index b7c5047f0816..9599e73db736 100644
>>> --- a/tests/gem_eio.c
>>> +++ b/tests/gem_eio.c
>>> @@ -591,6 +591,74 @@ static void test_inflight_internal(int fd, 
>>> unsigned int wait)
>>>       close(fd);
>>>   }
>>> +/*
>>> + * Verify that we can submit and execute work after unwedging the GPU.
>>> + */
>>> +static void test_reset_stress(int fd, unsigned int flags)
>>> +{
>>> +    uint32_t ctx0 = gem_context_create(fd);
>>> +
>>> +    igt_until_timeout(5) {
>>> +        struct drm_i915_gem_execbuffer2 execbuf = { };
>>> +        struct drm_i915_gem_exec_object2 obj = { };
>>> +        uint32_t bbe = MI_BATCH_BUFFER_END;
>>> +        igt_spin_t *hang;
>>> +        unsigned int i;
>>> +        uint32_t ctx;
>>> +
>>> +        gem_quiescent_gpu(fd);
>>> +
>>> +        igt_require(i915_reset_control(flags & TEST_WEDGE ?
>>> +                           false : true));
>>> +
>>> +        ctx = context_create_safe(fd);
>>> +
>>> +        /*
>>> +         * Start executing a spin batch with some queued batches
>>> +         * against a different context after it.
>>> +         */
>>
>> Aren't all batches queued on ctx0? Or is this a reference to the check 
>> on ctx you have later in the test.

Yes, a mistake in comment text.

>>> +        hang = spin_sync(fd, ctx0, 0);
> 
> I think you meant to send this^ on ctx.

Why do you think so? Did you find a different or better way to trigger 
the bug this test is trying to hit?

Regards,

Tvrtko

> Antonio.
> 
>>> +
>>> +        obj.handle = gem_create(fd, 4096);
>>> +        gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
>>> +
>>> +        execbuf.buffers_ptr = to_user_pointer(&obj);
>>> +        execbuf.buffer_count = 1;
>>> +        execbuf.rsvd1 = ctx0;
>>> +
>>> +        for (i = 0; i < 10; i++)
>>> +            gem_execbuf(fd, &execbuf);
>>> +
>>> +        /* Wedge after a small delay. */
>>> +        igt_assert_eq(__check_wait(fd, obj.handle, 100e3), 0);
>>> +
>>> +        /* Unwedge by forcing a reset. */
>>> +        igt_assert(i915_reset_control(true));
>>> +        trigger_reset(fd);
>>> +
>>> +        gem_quiescent_gpu(fd);
>>> +
>>> +        /*
>>> +         * Verify that we are able to submit work after unwedging from
>>> +         * both contexts.
>>> +         */
>>> +        execbuf.rsvd1 = ctx;
>>> +        for (i = 0; i < 5; i++)
>>> +            gem_execbuf(fd, &execbuf);
>>> +
>>> +        execbuf.rsvd1 = ctx0;
>>> +        for (i = 0; i < 5; i++)
>>> +            gem_execbuf(fd, &execbuf);
>>> +
>>> +        gem_sync(fd, obj.handle);
>>> +        igt_spin_batch_free(fd, hang);
>>> +        gem_context_destroy(fd, ctx);
>>> +        gem_close(fd, obj.handle);
>>> +    }
>>> +
>>> +    gem_context_destroy(fd, ctx0);
>>> +}
>>> +
>>>   static int fd = -1;
>>>   static void
>>> @@ -635,6 +703,12 @@ igt_main
>>>       igt_subtest("in-flight-suspend")
>>>           test_inflight_suspend(fd);
>>> +    igt_subtest("reset-stress")
>>> +        test_reset_stress(fd, 0);
>>> +
>>> +    igt_subtest("unwedge-stress")
>>> +        test_reset_stress(fd, TEST_WEDGE);
>>> +
>>>       igt_subtest_group {
>>>           const struct {
>>>               unsigned int wait;
>>>
>> _______________________________________________
>> igt-dev mailing list
>> igt-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/igt-dev
> _______________________________________________
> igt-dev mailing list
> igt-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/igt-dev
Chris Wilson April 4, 2018, 10:06 a.m. UTC | #5
Quoting Tvrtko Ursulin (2018-04-04 10:58:14)
> 
> On 03/04/2018 19:34, Antonio Argenziano wrote:
> > 
> > 
> > On 03/04/18 11:24, Antonio Argenziano wrote:
> >>
> >>
> >> On 03/04/18 04:36, Tvrtko Ursulin wrote:
> >>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>>
> >>> Reset and unwedge stress testing is supposed to trigger wedging or 
> >>> resets
> >>> at incovenient times and then re-use the context so either the 
> >>> context or
> >>> driver tracking might get confused and break.
> >>>
> >>> v2:
> >>>   * Renamed for more sensible naming.
> >>>   * Added some comments to explain what the test is doing. (Chris 
> >>> Wilson)
> >>>
> >>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>> ---
> >>>   tests/gem_eio.c | 74 
> >>> +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >>>   1 file changed, 74 insertions(+)
> >>>
> >>> diff --git a/tests/gem_eio.c b/tests/gem_eio.c
> >>> index b7c5047f0816..9599e73db736 100644
> >>> --- a/tests/gem_eio.c
> >>> +++ b/tests/gem_eio.c
> >>> @@ -591,6 +591,74 @@ static void test_inflight_internal(int fd, 
> >>> unsigned int wait)
> >>>       close(fd);
> >>>   }
> >>> +/*
> >>> + * Verify that we can submit and execute work after unwedging the GPU.
> >>> + */
> >>> +static void test_reset_stress(int fd, unsigned int flags)
> >>> +{
> >>> +    uint32_t ctx0 = gem_context_create(fd);
> >>> +
> >>> +    igt_until_timeout(5) {
> >>> +        struct drm_i915_gem_execbuffer2 execbuf = { };
> >>> +        struct drm_i915_gem_exec_object2 obj = { };
> >>> +        uint32_t bbe = MI_BATCH_BUFFER_END;
> >>> +        igt_spin_t *hang;
> >>> +        unsigned int i;
> >>> +        uint32_t ctx;
> >>> +
> >>> +        gem_quiescent_gpu(fd);
> >>> +
> >>> +        igt_require(i915_reset_control(flags & TEST_WEDGE ?
> >>> +                           false : true));
> >>> +
> >>> +        ctx = context_create_safe(fd);
> >>> +
> >>> +        /*
> >>> +         * Start executing a spin batch with some queued batches
> >>> +         * against a different context after it.
> >>> +         */
> >>
> >> Aren't all batches queued on ctx0? Or is this a reference to the check 
> >> on ctx you have later in the test.
> 
> Yes, a mistake in comment text.
> 
> >>> +        hang = spin_sync(fd, ctx0, 0);
> > 
> > I think you meant to send this^ on ctx.
> 
> Why do you think so? Did you find a different or better way to trigger 
> the bug this test is trying to hit?

You might need to explain that this test was trying to reproduce a
kernel bug around unwedging you found earlier, and instead managed to
find a similar one. ;)
-Chris
Antonio Argenziano April 4, 2018, 4:54 p.m. UTC | #6
On 04/04/18 02:58, Tvrtko Ursulin wrote:
> 
> On 03/04/2018 19:34, Antonio Argenziano wrote:
>>
>>
>> On 03/04/18 11:24, Antonio Argenziano wrote:
>>>
>>>
>>> On 03/04/18 04:36, Tvrtko Ursulin wrote:
>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>
>>>> Reset and unwedge stress testing is supposed to trigger wedging or 
>>>> resets
>>>> at incovenient times and then re-use the context so either the 
>>>> context or
>>>> driver tracking might get confused and break.
>>>>
>>>> v2:
>>>>   * Renamed for more sensible naming.
>>>>   * Added some comments to explain what the test is doing. (Chris 
>>>> Wilson)
>>>>
>>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>> ---
>>>>   tests/gem_eio.c | 74 
>>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>   1 file changed, 74 insertions(+)
>>>>
>>>> diff --git a/tests/gem_eio.c b/tests/gem_eio.c
>>>> index b7c5047f0816..9599e73db736 100644
>>>> --- a/tests/gem_eio.c
>>>> +++ b/tests/gem_eio.c
>>>> @@ -591,6 +591,74 @@ static void test_inflight_internal(int fd, 
>>>> unsigned int wait)
>>>>       close(fd);
>>>>   }
>>>> +/*
>>>> + * Verify that we can submit and execute work after unwedging the GPU.
>>>> + */
>>>> +static void test_reset_stress(int fd, unsigned int flags)
>>>> +{
>>>> +    uint32_t ctx0 = gem_context_create(fd);
>>>> +
>>>> +    igt_until_timeout(5) {
>>>> +        struct drm_i915_gem_execbuffer2 execbuf = { };
>>>> +        struct drm_i915_gem_exec_object2 obj = { };
>>>> +        uint32_t bbe = MI_BATCH_BUFFER_END;
>>>> +        igt_spin_t *hang;
>>>> +        unsigned int i;
>>>> +        uint32_t ctx;
>>>> +
>>>> +        gem_quiescent_gpu(fd);
>>>> +
>>>> +        igt_require(i915_reset_control(flags & TEST_WEDGE ?
>>>> +                           false : true));
>>>> +
>>>> +        ctx = context_create_safe(fd);
>>>> +
>>>> +        /*
>>>> +         * Start executing a spin batch with some queued batches
>>>> +         * against a different context after it.
>>>> +         */
>>>
>>> Aren't all batches queued on ctx0? Or is this a reference to the 
>>> check on ctx you have later in the test.
> 
> Yes, a mistake in comment text.
> 
>>>> +        hang = spin_sync(fd, ctx0, 0);
>>
>> I think you meant to send this^ on ctx.
> 
> Why do you think so? Did you find a different or better way to trigger 
> the bug this test is trying to hit?

Nope, I just misunderstood the code :). I thought you were creating ctx 
as 'safe' to be not 'bannable' because you were going to reuse the same 
context across multiple resets and didn't want it to be banned. BTW 
given that this is not the case wouldn't ctx0 be banned after so many 
resets?

Apologies for the cryptic comment,
Antonio.

> 
> Regards,
> 
> Tvrtko
> 
>> Antonio.
>>
>>>> +
>>>> +        obj.handle = gem_create(fd, 4096);
>>>> +        gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
>>>> +
>>>> +        execbuf.buffers_ptr = to_user_pointer(&obj);
>>>> +        execbuf.buffer_count = 1;
>>>> +        execbuf.rsvd1 = ctx0;
>>>> +
>>>> +        for (i = 0; i < 10; i++)
>>>> +            gem_execbuf(fd, &execbuf);
>>>> +
>>>> +        /* Wedge after a small delay. */
>>>> +        igt_assert_eq(__check_wait(fd, obj.handle, 100e3), 0);
>>>> +
>>>> +        /* Unwedge by forcing a reset. */
>>>> +        igt_assert(i915_reset_control(true));
>>>> +        trigger_reset(fd);
>>>> +
>>>> +        gem_quiescent_gpu(fd);
>>>> +
>>>> +        /*
>>>> +         * Verify that we are able to submit work after unwedging from
>>>> +         * both contexts.
>>>> +         */
>>>> +        execbuf.rsvd1 = ctx;
>>>> +        for (i = 0; i < 5; i++)
>>>> +            gem_execbuf(fd, &execbuf);
>>>> +
>>>> +        execbuf.rsvd1 = ctx0;
>>>> +        for (i = 0; i < 5; i++)
>>>> +            gem_execbuf(fd, &execbuf);
>>>> +
>>>> +        gem_sync(fd, obj.handle);
>>>> +        igt_spin_batch_free(fd, hang);
>>>> +        gem_context_destroy(fd, ctx);
>>>> +        gem_close(fd, obj.handle);
>>>> +    }
>>>> +
>>>> +    gem_context_destroy(fd, ctx0);
>>>> +}
>>>> +
>>>>   static int fd = -1;
>>>>   static void
>>>> @@ -635,6 +703,12 @@ igt_main
>>>>       igt_subtest("in-flight-suspend")
>>>>           test_inflight_suspend(fd);
>>>> +    igt_subtest("reset-stress")
>>>> +        test_reset_stress(fd, 0);
>>>> +
>>>> +    igt_subtest("unwedge-stress")
>>>> +        test_reset_stress(fd, TEST_WEDGE);
>>>> +
>>>>       igt_subtest_group {
>>>>           const struct {
>>>>               unsigned int wait;
>>>>
>>> _______________________________________________
>>> igt-dev mailing list
>>> igt-dev@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/igt-dev
>> _______________________________________________
>> igt-dev mailing list
>> igt-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/igt-dev
diff mbox

Patch

diff --git a/tests/gem_eio.c b/tests/gem_eio.c
index b7c5047f0816..9599e73db736 100644
--- a/tests/gem_eio.c
+++ b/tests/gem_eio.c
@@ -591,6 +591,74 @@  static void test_inflight_internal(int fd, unsigned int wait)
 	close(fd);
 }
 
+/*
+ * Verify that we can submit and execute work after unwedging the GPU.
+ */
+static void test_reset_stress(int fd, unsigned int flags)
+{
+	uint32_t ctx0 = gem_context_create(fd);
+
+	igt_until_timeout(5) {
+		struct drm_i915_gem_execbuffer2 execbuf = { };
+		struct drm_i915_gem_exec_object2 obj = { };
+		uint32_t bbe = MI_BATCH_BUFFER_END;
+		igt_spin_t *hang;
+		unsigned int i;
+		uint32_t ctx;
+
+		gem_quiescent_gpu(fd);
+
+		igt_require(i915_reset_control(flags & TEST_WEDGE ?
+					       false : true));
+
+		ctx = context_create_safe(fd);
+
+		/*
+		 * Start executing a spin batch with some queued batches
+		 * against a different context after it.
+		 */
+		hang = spin_sync(fd, ctx0, 0);
+
+		obj.handle = gem_create(fd, 4096);
+		gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+
+		execbuf.buffers_ptr = to_user_pointer(&obj);
+		execbuf.buffer_count = 1;
+		execbuf.rsvd1 = ctx0;
+
+		for (i = 0; i < 10; i++)
+			gem_execbuf(fd, &execbuf);
+
+		/* Wedge after a small delay. */
+		igt_assert_eq(__check_wait(fd, obj.handle, 100e3), 0);
+
+		/* Unwedge by forcing a reset. */
+		igt_assert(i915_reset_control(true));
+		trigger_reset(fd);
+
+		gem_quiescent_gpu(fd);
+
+		/*
+		 * Verify that we are able to submit work after unwedging from
+		 * both contexts.
+		 */
+		execbuf.rsvd1 = ctx;
+		for (i = 0; i < 5; i++)
+			gem_execbuf(fd, &execbuf);
+
+		execbuf.rsvd1 = ctx0;
+		for (i = 0; i < 5; i++)
+			gem_execbuf(fd, &execbuf);
+
+		gem_sync(fd, obj.handle);
+		igt_spin_batch_free(fd, hang);
+		gem_context_destroy(fd, ctx);
+		gem_close(fd, obj.handle);
+	}
+
+	gem_context_destroy(fd, ctx0);
+}
+
 static int fd = -1;
 
 static void
@@ -635,6 +703,12 @@  igt_main
 	igt_subtest("in-flight-suspend")
 		test_inflight_suspend(fd);
 
+	igt_subtest("reset-stress")
+		test_reset_stress(fd, 0);
+
+	igt_subtest("unwedge-stress")
+		test_reset_stress(fd, TEST_WEDGE);
+
 	igt_subtest_group {
 		const struct {
 			unsigned int wait;