diff mbox

drm/i915: Fix timeout with missed interrupts in __wait_seqno

Message ID 1386687763-21777-1-git-send-email-mika.kuoppala@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Mika Kuoppala Dec. 10, 2013, 3:02 p.m. UTC
Commit 094f9a54e355 ("drm/i915: Fix __wait_seqno to use true infinite
timeouts") added support for __wait_seqno to detect missing interrupts and
go around them by polling. As there is also timeout detection in
__wait_seqno, the polling and timeout detection were done with the same
timer.

When there has been missed interrupts and polling is needed, the timer is
set to trigger in (now + 1) jiffies in future, instead of the caller
specified timeout.

Now when io_schedule() returns, we calculate the jiffies left to timeout
using the timer expiration value. As the current jiffies is now bound to be
always equal or greater than the expiration value, the timeout_jiffies will
become zero or negative and we return -ETIME to caller even tho the
timeout was never reached.

Fix this by decoupling timeout calculation from timer expiration.

v2: Commit message with some sense in it (Chris Wilson)

v3: add parenthesis on timeout_expire calculation

v4: don't read jiffies without timeout (Chris Wilson)

Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c |   14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

Comments

Ville Syrjälä Dec. 12, 2013, 12:48 p.m. UTC | #1
On Tue, Dec 10, 2013 at 05:02:43PM +0200, Mika Kuoppala wrote:
> Commit 094f9a54e355 ("drm/i915: Fix __wait_seqno to use true infinite
> timeouts") added support for __wait_seqno to detect missing interrupts and
> go around them by polling. As there is also timeout detection in
> __wait_seqno, the polling and timeout detection were done with the same
> timer.
> 
> When there has been missed interrupts and polling is needed, the timer is
> set to trigger in (now + 1) jiffies in future, instead of the caller
> specified timeout.
> 
> Now when io_schedule() returns, we calculate the jiffies left to timeout
> using the timer expiration value. As the current jiffies is now bound to be
> always equal or greater than the expiration value, the timeout_jiffies will
> become zero or negative and we return -ETIME to caller even tho the
> timeout was never reached.
> 
> Fix this by decoupling timeout calculation from timer expiration.
> 
> v2: Commit message with some sense in it (Chris Wilson)
> 
> v3: add parenthesis on timeout_expire calculation
> 
> v4: don't read jiffies without timeout (Chris Wilson)
> 
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c |   14 ++++++--------
>  1 file changed, 6 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 92149bc..6d2e786 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1017,7 +1017,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
>  	drm_i915_private_t *dev_priv = ring->dev->dev_private;
>  	struct timespec before, now;
>  	DEFINE_WAIT(wait);
> -	long timeout_jiffies;
> +	unsigned long timeout_expire;
>  	int ret;
>  
>  	WARN(dev_priv->pc8.irqs_disabled, "IRQs disabled\n");
> @@ -1025,7 +1025,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
>  	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
>  		return 0;
>  
> -	timeout_jiffies = timeout ? timespec_to_jiffies_timeout(timeout) : 1;
> +	timeout_expire = timeout ? jiffies + timespec_to_jiffies_timeout(timeout) : 0;
>  
>  	if (dev_priv->info->gen >= 6 && can_wait_boost(file_priv)) {
>  		gen6_rps_boost(dev_priv);
> @@ -1044,7 +1044,6 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
>  	getrawmonotonic(&before);
>  	for (;;) {
>  		struct timer_list timer;
> -		unsigned long expire;
>  
>  		prepare_to_wait(&ring->irq_queue, &wait,
>  				interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
> @@ -1070,23 +1069,22 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
>  			break;
>  		}
>  
> -		if (timeout_jiffies <= 0) {
> +		if (timeout && time_after_eq(jiffies, timeout_expire)) {
>  			ret = -ETIME;
>  			break;
>  		}
>  
>  		timer.function = NULL;
>  		if (timeout || missed_irq(dev_priv, ring)) {
> +			unsigned long expire;
> +
>  			setup_timer_on_stack(&timer, fake_irq, (unsigned long)current);
> -			expire = jiffies + (missed_irq(dev_priv, ring) ? 1: timeout_jiffies);
> +			expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire;

I guess we have very small race here if we get called w/ timeout==NULL, and
missed_irq() was true above but is no longer true here. At that point we would
set expire=0 and might end up waiting for quite a while. But that issue was
present already in the code before this patch and otherwise it all
looks good to me, so:

Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

>  			mod_timer(&timer, expire);
>  		}
>  
>  		io_schedule();
>  
> -		if (timeout)
> -			timeout_jiffies = expire - jiffies;
> -
>  		if (timer.function) {
>  			del_singleshot_timer_sync(&timer);
>  			destroy_timer_on_stack(&timer);
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Daniel Vetter Dec. 12, 2013, 2:28 p.m. UTC | #2
On Thu, Dec 12, 2013 at 02:48:47PM +0200, Ville Syrjälä wrote:
> On Tue, Dec 10, 2013 at 05:02:43PM +0200, Mika Kuoppala wrote:
> > Commit 094f9a54e355 ("drm/i915: Fix __wait_seqno to use true infinite
> > timeouts") added support for __wait_seqno to detect missing interrupts and
> > go around them by polling. As there is also timeout detection in
> > __wait_seqno, the polling and timeout detection were done with the same
> > timer.
> > 
> > When there has been missed interrupts and polling is needed, the timer is
> > set to trigger in (now + 1) jiffies in future, instead of the caller
> > specified timeout.
> > 
> > Now when io_schedule() returns, we calculate the jiffies left to timeout
> > using the timer expiration value. As the current jiffies is now bound to be
> > always equal or greater than the expiration value, the timeout_jiffies will
> > become zero or negative and we return -ETIME to caller even tho the
> > timeout was never reached.
> > 
> > Fix this by decoupling timeout calculation from timer expiration.
> > 
> > v2: Commit message with some sense in it (Chris Wilson)
> > 
> > v3: add parenthesis on timeout_expire calculation
> > 
> > v4: don't read jiffies without timeout (Chris Wilson)
> > 
> > Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_gem.c |   14 ++++++--------
> >  1 file changed, 6 insertions(+), 8 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index 92149bc..6d2e786 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -1017,7 +1017,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
> >  	drm_i915_private_t *dev_priv = ring->dev->dev_private;
> >  	struct timespec before, now;
> >  	DEFINE_WAIT(wait);
> > -	long timeout_jiffies;
> > +	unsigned long timeout_expire;
> >  	int ret;
> >  
> >  	WARN(dev_priv->pc8.irqs_disabled, "IRQs disabled\n");
> > @@ -1025,7 +1025,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
> >  	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
> >  		return 0;
> >  
> > -	timeout_jiffies = timeout ? timespec_to_jiffies_timeout(timeout) : 1;
> > +	timeout_expire = timeout ? jiffies + timespec_to_jiffies_timeout(timeout) : 0;
> >  
> >  	if (dev_priv->info->gen >= 6 && can_wait_boost(file_priv)) {
> >  		gen6_rps_boost(dev_priv);
> > @@ -1044,7 +1044,6 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
> >  	getrawmonotonic(&before);
> >  	for (;;) {
> >  		struct timer_list timer;
> > -		unsigned long expire;
> >  
> >  		prepare_to_wait(&ring->irq_queue, &wait,
> >  				interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
> > @@ -1070,23 +1069,22 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
> >  			break;
> >  		}
> >  
> > -		if (timeout_jiffies <= 0) {
> > +		if (timeout && time_after_eq(jiffies, timeout_expire)) {
> >  			ret = -ETIME;
> >  			break;
> >  		}
> >  
> >  		timer.function = NULL;
> >  		if (timeout || missed_irq(dev_priv, ring)) {
> > +			unsigned long expire;
> > +
> >  			setup_timer_on_stack(&timer, fake_irq, (unsigned long)current);
> > -			expire = jiffies + (missed_irq(dev_priv, ring) ? 1: timeout_jiffies);
> > +			expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire;
> 
> I guess we have very small race here if we get called w/ timeout==NULL, and
> missed_irq() was true above but is no longer true here. At that point we would
> set expire=0 and might end up waiting for quite a while. But that issue was
> present already in the code before this patch and otherwise it all
> looks good to me, so:

We shouldn't ever reset misseq_irq in normal operations, so this should be
ok.

> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

Queued for -next, thanks for the patch.
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 92149bc..6d2e786 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1017,7 +1017,7 @@  static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
 	struct timespec before, now;
 	DEFINE_WAIT(wait);
-	long timeout_jiffies;
+	unsigned long timeout_expire;
 	int ret;
 
 	WARN(dev_priv->pc8.irqs_disabled, "IRQs disabled\n");
@@ -1025,7 +1025,7 @@  static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
 	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
 		return 0;
 
-	timeout_jiffies = timeout ? timespec_to_jiffies_timeout(timeout) : 1;
+	timeout_expire = timeout ? jiffies + timespec_to_jiffies_timeout(timeout) : 0;
 
 	if (dev_priv->info->gen >= 6 && can_wait_boost(file_priv)) {
 		gen6_rps_boost(dev_priv);
@@ -1044,7 +1044,6 @@  static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
 	getrawmonotonic(&before);
 	for (;;) {
 		struct timer_list timer;
-		unsigned long expire;
 
 		prepare_to_wait(&ring->irq_queue, &wait,
 				interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
@@ -1070,23 +1069,22 @@  static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
 			break;
 		}
 
-		if (timeout_jiffies <= 0) {
+		if (timeout && time_after_eq(jiffies, timeout_expire)) {
 			ret = -ETIME;
 			break;
 		}
 
 		timer.function = NULL;
 		if (timeout || missed_irq(dev_priv, ring)) {
+			unsigned long expire;
+
 			setup_timer_on_stack(&timer, fake_irq, (unsigned long)current);
-			expire = jiffies + (missed_irq(dev_priv, ring) ? 1: timeout_jiffies);
+			expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire;
 			mod_timer(&timer, expire);
 		}
 
 		io_schedule();
 
-		if (timeout)
-			timeout_jiffies = expire - jiffies;
-
 		if (timer.function) {
 			del_singleshot_timer_sync(&timer);
 			destroy_timer_on_stack(&timer);