diff mbox

drm/i915/selftests: Wait for the dma-fence timeout

Message ID 20180117121546.6023-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Jan. 17, 2018, 12:15 p.m. UTC
When testing that the timeout fired, we need to be sure we have waited
just long enough for the timeout to have occurred and for the softirq
(on another cpu) to have completed. Sleeping for an arbitrary amount is
prone to error, so wait for the timeout instead and complain if it was
too late.

References: https://bugs.freedesktop.org/show_bug.cgi?id=104670
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/selftests/i915_sw_fence.c | 30 +++++++++-----------------
 1 file changed, 10 insertions(+), 20 deletions(-)

Comments

Tvrtko Ursulin Jan. 17, 2018, 12:27 p.m. UTC | #1
On 17/01/2018 12:15, Chris Wilson wrote:
> When testing that the timeout fired, we need to be sure we have waited
> just long enough for the timeout to have occurred and for the softirq
> (on another cpu) to have completed. Sleeping for an arbitrary amount is
> prone to error, so wait for the timeout instead and complain if it was
> too late.
> 
> References: https://bugs.freedesktop.org/show_bug.cgi?id=104670
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/selftests/i915_sw_fence.c | 30 +++++++++-----------------
>   1 file changed, 10 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
> index 4fb51deb81a1..f171db16a1c4 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
> @@ -661,7 +661,7 @@ static int test_dma_fence(void *arg)
>   {
>   	struct i915_sw_fence *timeout = NULL, *not = NULL;
>   	unsigned long delay = i915_selftest.timeout_jiffies;
> -	unsigned long end, sleep;
> +	unsigned long start, end;
>   	struct dma_fence *dma;
>   	int err;
>   
> @@ -688,36 +688,26 @@ static int test_dma_fence(void *arg)
>   	}
>   
>   	/* We round the timeout for the fence up to the next second */
> -	end = round_jiffies_up(jiffies + delay);
> +	start = jiffies;
> +	end = round_jiffies_up(start + delay);
>   
> -	sleep = jiffies_to_usecs(delay) / 3;
> -	usleep_range(sleep, 2 * sleep);
> -	if (time_after(jiffies, end)) {
> -		pr_debug("Slept too long, delay=%lu, skipping!\n", delay);
> -		goto skip;
> +	if (wait_event_interruptible(timeout->wait,
> +				     i915_sw_fence_done(timeout))) {
> +		err = -EINTR;
> +		goto err;

But can now hang forever if broken.

How about short sleeps in a loop until a timeout?

Regards,

Tvrtko

>   	}
>   
> -	if (i915_sw_fence_done(timeout) || i915_sw_fence_done(not)) {
> -		pr_err("Fences signaled too early\n");
> +	if (time_after(jiffies, 2*end - start)) {
> +		pr_err("Timeout (now %lu) later than expected (%lu)\n",
> +		       jiffies, end);
>   		goto err;
>   	}
>   
> -	do {
> -		sleep = jiffies_to_usecs(end - jiffies + 1);
> -		usleep_range(sleep, 2 * sleep);
> -	} while (!time_after(jiffies, end));
> -
>   	if (i915_sw_fence_done(not)) {
>   		pr_err("No timeout fence signaled!\n");
>   		goto err;
>   	}
>   
> -	if (!i915_sw_fence_done(timeout)) {
> -		pr_err("Timeout fence unsignaled!\n");
> -		goto err;
> -	}
> -
> -skip:
>   	dma_fence_signal(dma);
>   
>   	if (!i915_sw_fence_done(timeout) || !i915_sw_fence_done(not)) {
>
Chris Wilson Jan. 17, 2018, 12:44 p.m. UTC | #2
Quoting Tvrtko Ursulin (2018-01-17 12:27:50)
> 
> On 17/01/2018 12:15, Chris Wilson wrote:
> > When testing that the timeout fired, we need to be sure we have waited
> > just long enough for the timeout to have occurred and for the softirq
> > (on another cpu) to have completed. Sleeping for an arbitrary amount is
> > prone to error, so wait for the timeout instead and complain if it was
> > too late.
> > 
> > References: https://bugs.freedesktop.org/show_bug.cgi?id=104670
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/selftests/i915_sw_fence.c | 30 +++++++++-----------------
> >   1 file changed, 10 insertions(+), 20 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
> > index 4fb51deb81a1..f171db16a1c4 100644
> > --- a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
> > +++ b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
> > @@ -661,7 +661,7 @@ static int test_dma_fence(void *arg)
> >   {
> >       struct i915_sw_fence *timeout = NULL, *not = NULL;
> >       unsigned long delay = i915_selftest.timeout_jiffies;
> > -     unsigned long end, sleep;
> > +     unsigned long start, end;
> >       struct dma_fence *dma;
> >       int err;
> >   
> > @@ -688,36 +688,26 @@ static int test_dma_fence(void *arg)
> >       }
> >   
> >       /* We round the timeout for the fence up to the next second */
> > -     end = round_jiffies_up(jiffies + delay);
> > +     start = jiffies;
> > +     end = round_jiffies_up(start + delay);
> >   
> > -     sleep = jiffies_to_usecs(delay) / 3;
> > -     usleep_range(sleep, 2 * sleep);
> > -     if (time_after(jiffies, end)) {
> > -             pr_debug("Slept too long, delay=%lu, skipping!\n", delay);
> > -             goto skip;
> > +     if (wait_event_interruptible(timeout->wait,
> > +                                  i915_sw_fence_done(timeout))) {
> > +             err = -EINTR;
> > +             goto err;
> 
> But can now hang forever if broken.

Which is why I allowed it to be interrupted. Maybe wait_event_killable()?

> How about short sleeps in a loop until a timeout?

I kind of like the idea of using the waitqueue now :)

How about if I attach a timer to kick the waitqueue instead.
-Chris
Tvrtko Ursulin Jan. 17, 2018, 12:54 p.m. UTC | #3
On 17/01/2018 12:44, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-01-17 12:27:50)
>>
>> On 17/01/2018 12:15, Chris Wilson wrote:
>>> When testing that the timeout fired, we need to be sure we have waited
>>> just long enough for the timeout to have occurred and for the softirq
>>> (on another cpu) to have completed. Sleeping for an arbitrary amount is
>>> prone to error, so wait for the timeout instead and complain if it was
>>> too late.
>>>
>>> References: https://bugs.freedesktop.org/show_bug.cgi?id=104670
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/selftests/i915_sw_fence.c | 30 +++++++++-----------------
>>>    1 file changed, 10 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
>>> index 4fb51deb81a1..f171db16a1c4 100644
>>> --- a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
>>> +++ b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
>>> @@ -661,7 +661,7 @@ static int test_dma_fence(void *arg)
>>>    {
>>>        struct i915_sw_fence *timeout = NULL, *not = NULL;
>>>        unsigned long delay = i915_selftest.timeout_jiffies;
>>> -     unsigned long end, sleep;
>>> +     unsigned long start, end;
>>>        struct dma_fence *dma;
>>>        int err;
>>>    
>>> @@ -688,36 +688,26 @@ static int test_dma_fence(void *arg)
>>>        }
>>>    
>>>        /* We round the timeout for the fence up to the next second */
>>> -     end = round_jiffies_up(jiffies + delay);
>>> +     start = jiffies;
>>> +     end = round_jiffies_up(start + delay);
>>>    
>>> -     sleep = jiffies_to_usecs(delay) / 3;
>>> -     usleep_range(sleep, 2 * sleep);
>>> -     if (time_after(jiffies, end)) {
>>> -             pr_debug("Slept too long, delay=%lu, skipping!\n", delay);
>>> -             goto skip;
>>> +     if (wait_event_interruptible(timeout->wait,
>>> +                                  i915_sw_fence_done(timeout))) {
>>> +             err = -EINTR;
>>> +             goto err;
>>
>> But can now hang forever if broken.
> 
> Which is why I allowed it to be interrupted. Maybe wait_event_killable()?
> 
>> How about short sleeps in a loop until a timeout?
> 
> I kind of like the idea of using the waitqueue now :)
> 
> How about if I attach a timer to kick the waitqueue instead.

I would hate to be the trigger of overkills. But I think it is nice if 
the test times out on it's own. So either option is fine by me. (Short 
sleep polling loop, or timer kick with timeout checking.)

Regards,

Tvrtko
Saarinen, Jani Jan. 17, 2018, 1:29 p.m. UTC | #4
HI, 
> -----Original Message-----

> From: Intel-gfx [mailto:intel-gfx-bounces@lists.freedesktop.org] On Behalf Of

> Patchwork

> Sent: keskiviikko 17. tammikuuta 2018 15.26

> To: Chris Wilson <chris@chris-wilson.co.uk>

> Cc: intel-gfx@lists.freedesktop.org

> Subject: [Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915/selftests: Wait for the

> dma-fence timeout

> 

> == Series Details ==

> 

> Series: drm/i915/selftests: Wait for the dma-fence timeout

> URL   : https://patchwork.freedesktop.org/series/36610/

> State : success

> 

> == Summary ==

> 

> Series 36610v1 drm/i915/selftests: Wait for the dma-fence timeout

> https://patchwork.freedesktop.org/api/1.0/series/36610/revisions/1/mbox/

> 

> fi-bdw-5557u     total:288  pass:267  dwarn:0   dfail:0   fail:0   skip:21  time:421s

> fi-bdw-gvtdvm    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24

> time:427s

> fi-blb-e6850     total:288  pass:223  dwarn:1   dfail:0   fail:0   skip:64  time:374s

> fi-bwr-2160      total:288  pass:183  dwarn:0   dfail:0   fail:0   skip:105 time:282s

> fi-bxt-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:482s

> fi-bxt-j4205     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:487s

> fi-elk-e7500     total:224  pass:168  dwarn:9   dfail:1   fail:0   skip:45

> fi-glk-1         total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:515s

> fi-hsw-4770      total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:393s

> fi-hsw-4770r     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:401s

> fi-ivb-3520m     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:464s

> fi-ivb-3770      total:288  pass:255  dwarn:0   dfail:0   fail:0   skip:33  time:419s

> fi-kbl-7500u     total:288  pass:263  dwarn:1   dfail:0   fail:0   skip:24  time:465s

> fi-kbl-7560u     total:288  pass:269  dwarn:0   dfail:0   fail:0   skip:19  time:501s

> fi-kbl-7567u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:453s

> fi-kbl-r         total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:501s

> fi-skl-6260u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:427s

> fi-skl-6600u     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:516s

> fi-skl-6700hq    total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:529s

> fi-skl-6700k2    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:492s

> fi-skl-6770hq    total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:477s

> fi-skl-gvtdvm    total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:432s

> fi-snb-2520m     total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:518s

> fi-snb-2600      total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:406s

> Blacklisted hosts:

> fi-cfl-s2        total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:573s

> fi-glk-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:473s

> fi-bsw-n3050 failed to connect after reboot

> fi-byt-j1900 failed to connect after reboot

> fi-byt-n2820 failed to connect after reboot

> fi-gdg-551 failed to connect after reboot

> fi-ilk-650 failed to connect after reboot

> fi-pnv-d510 failed to connect after reboot

These due to series or some other issue? 
> 

> a5cf4f0319302df5036145fd6c96d6966cc677ae drm-tip: 2018y-01m-17d-09h-

> 14m-44s UTC integration manifest

> 45d7504b5ec6 drm/i915/selftests: Wait for the dma-fence timeout

> 

> == Logs ==

> 

> For more details see: https://intel-gfx-ci.01.org/tree/drm-

> tip/Patchwork_7694/issues.html

> _______________________________________________

> Intel-gfx mailing list

> Intel-gfx@lists.freedesktop.org

> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson Jan. 17, 2018, 1:31 p.m. UTC | #5
Quoting Saarinen, Jani (2018-01-17 13:29:13)
> HI, 
> > -----Original Message-----
> > From: Intel-gfx [mailto:intel-gfx-bounces@lists.freedesktop.org] On Behalf Of
> > Patchwork
> > Sent: keskiviikko 17. tammikuuta 2018 15.26
> > To: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: intel-gfx@lists.freedesktop.org
> > Subject: [Intel-gfx] ✓ Fi.CI.BAT: success for drm/i915/selftests: Wait for the
> > dma-fence timeout
> > 
> > == Series Details ==
> > 
> > Series: drm/i915/selftests: Wait for the dma-fence timeout
> > URL   : https://patchwork.freedesktop.org/series/36610/
> > State : success
> > 
> > == Summary ==
> > 
> > Series 36610v1 drm/i915/selftests: Wait for the dma-fence timeout
> > https://patchwork.freedesktop.org/api/1.0/series/36610/revisions/1/mbox/
> > 
> > fi-bdw-5557u     total:288  pass:267  dwarn:0   dfail:0   fail:0   skip:21  time:421s
> > fi-bdw-gvtdvm    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24
> > time:427s
> > fi-blb-e6850     total:288  pass:223  dwarn:1   dfail:0   fail:0   skip:64  time:374s
> > fi-bwr-2160      total:288  pass:183  dwarn:0   dfail:0   fail:0   skip:105 time:282s
> > fi-bxt-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:482s
> > fi-bxt-j4205     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:487s
> > fi-elk-e7500     total:224  pass:168  dwarn:9   dfail:1   fail:0   skip:45
> > fi-glk-1         total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:515s
> > fi-hsw-4770      total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:393s
> > fi-hsw-4770r     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:401s
> > fi-ivb-3520m     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:464s
> > fi-ivb-3770      total:288  pass:255  dwarn:0   dfail:0   fail:0   skip:33  time:419s
> > fi-kbl-7500u     total:288  pass:263  dwarn:1   dfail:0   fail:0   skip:24  time:465s
> > fi-kbl-7560u     total:288  pass:269  dwarn:0   dfail:0   fail:0   skip:19  time:501s
> > fi-kbl-7567u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:453s
> > fi-kbl-r         total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:501s
> > fi-skl-6260u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:427s
> > fi-skl-6600u     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:516s
> > fi-skl-6700hq    total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:529s
> > fi-skl-6700k2    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:492s
> > fi-skl-6770hq    total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:477s
> > fi-skl-gvtdvm    total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:432s
> > fi-snb-2520m     total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:518s
> > fi-snb-2600      total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:406s
> > Blacklisted hosts:
> > fi-cfl-s2        total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:573s
> > fi-glk-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:473s
> > fi-bsw-n3050 failed to connect after reboot
> > fi-byt-j1900 failed to connect after reboot
> > fi-byt-n2820 failed to connect after reboot
> > fi-gdg-551 failed to connect after reboot
> > fi-ilk-650 failed to connect after reboot
> > fi-pnv-d510 failed to connect after reboot
> These due to series or some other issue? 

Highly unlikely to be this patch as it only touches code inside
selftests/ which is not run.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
index 4fb51deb81a1..f171db16a1c4 100644
--- a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
@@ -661,7 +661,7 @@  static int test_dma_fence(void *arg)
 {
 	struct i915_sw_fence *timeout = NULL, *not = NULL;
 	unsigned long delay = i915_selftest.timeout_jiffies;
-	unsigned long end, sleep;
+	unsigned long start, end;
 	struct dma_fence *dma;
 	int err;
 
@@ -688,36 +688,26 @@  static int test_dma_fence(void *arg)
 	}
 
 	/* We round the timeout for the fence up to the next second */
-	end = round_jiffies_up(jiffies + delay);
+	start = jiffies;
+	end = round_jiffies_up(start + delay);
 
-	sleep = jiffies_to_usecs(delay) / 3;
-	usleep_range(sleep, 2 * sleep);
-	if (time_after(jiffies, end)) {
-		pr_debug("Slept too long, delay=%lu, skipping!\n", delay);
-		goto skip;
+	if (wait_event_interruptible(timeout->wait,
+				     i915_sw_fence_done(timeout))) {
+		err = -EINTR;
+		goto err;
 	}
 
-	if (i915_sw_fence_done(timeout) || i915_sw_fence_done(not)) {
-		pr_err("Fences signaled too early\n");
+	if (time_after(jiffies, 2*end - start)) {
+		pr_err("Timeout (now %lu) later than expected (%lu)\n",
+		       jiffies, end);
 		goto err;
 	}
 
-	do {
-		sleep = jiffies_to_usecs(end - jiffies + 1);
-		usleep_range(sleep, 2 * sleep);
-	} while (!time_after(jiffies, end));
-
 	if (i915_sw_fence_done(not)) {
 		pr_err("No timeout fence signaled!\n");
 		goto err;
 	}
 
-	if (!i915_sw_fence_done(timeout)) {
-		pr_err("Timeout fence unsignaled!\n");
-		goto err;
-	}
-
-skip:
 	dma_fence_signal(dma);
 
 	if (!i915_sw_fence_done(timeout) || !i915_sw_fence_done(not)) {