diff mbox

[03/18] drm/i915: "Race-to-idle" after switching to the kernel context

Message ID 20180525093206.1919-4-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson May 25, 2018, 9:31 a.m. UTC
During suspend we want to flush out all active contexts and their
rendering. To do so we queue a request from the kernel's context, once
we know that request is done, we know the GPU is completely idle. To
speed up that switch bump the GPU clocks.

Switching to the kernel context prior to idling is also used to enforce
a barrier before changing OA properties, and when evicting active
rendering from the global GTT. All cases where we do want to
race-to-idle.

v2: Limit the boosting to only the switch before suspend.
v3: Limit it to the wait-for-idle on suspend.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: David Weinehall <david.weinehall@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> #v1
Tested-by: David Weinehall <david.weinehall@linux.intel.com> #v1
---
 drivers/gpu/drm/i915/i915_gem.c     | 27 +++++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_request.h |  1 +
 2 files changed, 26 insertions(+), 2 deletions(-)

Comments

Mika Kuoppala May 25, 2018, 12:22 p.m. UTC | #1
Chris Wilson <chris@chris-wilson.co.uk> writes:

> During suspend we want to flush out all active contexts and their
> rendering. To do so we queue a request from the kernel's context, once
> we know that request is done, we know the GPU is completely idle. To
> speed up that switch bump the GPU clocks.
>
> Switching to the kernel context prior to idling is also used to enforce
> a barrier before changing OA properties, and when evicting active
> rendering from the global GTT. All cases where we do want to
> race-to-idle.
>
> v2: Limit the boosting to only the switch before suspend.
> v3: Limit it to the wait-for-idle on suspend.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: David Weinehall <david.weinehall@linux.intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> #v1
> Tested-by: David Weinehall <david.weinehall@linux.intel.com> #v1
> ---
>  drivers/gpu/drm/i915/i915_gem.c     | 27 +++++++++++++++++++++++++--
>  drivers/gpu/drm/i915/i915_request.h |  1 +
>  2 files changed, 26 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index c93f5dcb1d82..7b5544efa0ba 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3704,7 +3704,29 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  
>  static int wait_for_timeline(struct i915_timeline *tl, unsigned int flags)
>  {
> -	return i915_gem_active_wait(&tl->last_request, flags);
> +	struct i915_request *rq;
> +	long ret;
> +
> +	rq = i915_gem_active_get_unlocked(&tl->last_request);
> +	if (!rq)
> +		return 0;
> +
> +	/*
> +	 * "Race-to-idle".
> +	 *
> +	 * Switching to the kernel context is often used a synchronous
> +	 * step prior to idling, e.g. in suspend for flushing all
> +	 * current operations to memory before sleeping. These we
> +	 * want to complete as quickly as possible to avoid prolonged
> +	 * stalls, so allow the gpu to boost to maximum clocks.
> +	 */
> +	if (flags & I915_WAIT_FOR_IDLE_BOOST)
> +		gen6_rps_boost(rq, NULL);
> +
> +	ret = i915_request_wait(rq, flags, MAX_SCHEDULE_TIMEOUT);

I pondered about the boost flag falling through into wait.
But they are flags on wait domain so no reason to split/limit.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>

> +	i915_request_put(rq);
> +
> +	return ret < 0 ? ret : 0;
>  }
>  
>  static int wait_for_engines(struct drm_i915_private *i915)
> @@ -4979,7 +5001,8 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
>  
>  		ret = i915_gem_wait_for_idle(dev_priv,
>  					     I915_WAIT_INTERRUPTIBLE |
> -					     I915_WAIT_LOCKED);
> +					     I915_WAIT_LOCKED |
> +					     I915_WAIT_FOR_IDLE_BOOST);
>  		if (ret && ret != -EIO)
>  			goto err_unlock;
>  
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 1bbbb7a9fa03..491ff81d0fea 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -267,6 +267,7 @@ long i915_request_wait(struct i915_request *rq,
>  #define I915_WAIT_INTERRUPTIBLE	BIT(0)
>  #define I915_WAIT_LOCKED	BIT(1) /* struct_mutex held, handle GPU reset */
>  #define I915_WAIT_ALL		BIT(2) /* used by i915_gem_object_wait() */
> +#define I915_WAIT_FOR_IDLE_BOOST BIT(3)
>  
>  static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
>  
> -- 
> 2.17.0
Chris Wilson May 25, 2018, 12:26 p.m. UTC | #2
Quoting Mika Kuoppala (2018-05-25 13:22:55)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > During suspend we want to flush out all active contexts and their
> > rendering. To do so we queue a request from the kernel's context, once
> > we know that request is done, we know the GPU is completely idle. To
> > speed up that switch bump the GPU clocks.
> >
> > Switching to the kernel context prior to idling is also used to enforce
> > a barrier before changing OA properties, and when evicting active
> > rendering from the global GTT. All cases where we do want to
> > race-to-idle.
> >
> > v2: Limit the boosting to only the switch before suspend.
> > v3: Limit it to the wait-for-idle on suspend.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: David Weinehall <david.weinehall@linux.intel.com>
> > Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> > Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> #v1
> > Tested-by: David Weinehall <david.weinehall@linux.intel.com> #v1
> > ---
> >  drivers/gpu/drm/i915/i915_gem.c     | 27 +++++++++++++++++++++++++--
> >  drivers/gpu/drm/i915/i915_request.h |  1 +
> >  2 files changed, 26 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index c93f5dcb1d82..7b5544efa0ba 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -3704,7 +3704,29 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> >  
> >  static int wait_for_timeline(struct i915_timeline *tl, unsigned int flags)
> >  {
> > -     return i915_gem_active_wait(&tl->last_request, flags);
> > +     struct i915_request *rq;
> > +     long ret;
> > +
> > +     rq = i915_gem_active_get_unlocked(&tl->last_request);
> > +     if (!rq)
> > +             return 0;
> > +
> > +     /*
> > +      * "Race-to-idle".
> > +      *
> > +      * Switching to the kernel context is often used a synchronous
> > +      * step prior to idling, e.g. in suspend for flushing all
> > +      * current operations to memory before sleeping. These we
> > +      * want to complete as quickly as possible to avoid prolonged
> > +      * stalls, so allow the gpu to boost to maximum clocks.
> > +      */
> > +     if (flags & I915_WAIT_FOR_IDLE_BOOST)
> > +             gen6_rps_boost(rq, NULL);
> > +
> > +     ret = i915_request_wait(rq, flags, MAX_SCHEDULE_TIMEOUT);
> 
> I pondered about the boost flag falling through into wait.
> But they are flags on wait domain so no reason to split/limit.

Indeed,

> >  #define I915_WAIT_INTERRUPTIBLE      BIT(0)
> >  #define I915_WAIT_LOCKED     BIT(1) /* struct_mutex held, handle GPU reset */
> >  #define I915_WAIT_ALL                BIT(2) /* used by i915_gem_object_wait() */
> > +#define I915_WAIT_FOR_IDLE_BOOST BIT(3)

I thought was a reasonable way to share the same set of flags with
distinct regions.

Thanks for the review and discussion,
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c93f5dcb1d82..7b5544efa0ba 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3704,7 +3704,29 @@  i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 
 static int wait_for_timeline(struct i915_timeline *tl, unsigned int flags)
 {
-	return i915_gem_active_wait(&tl->last_request, flags);
+	struct i915_request *rq;
+	long ret;
+
+	rq = i915_gem_active_get_unlocked(&tl->last_request);
+	if (!rq)
+		return 0;
+
+	/*
+	 * "Race-to-idle".
+	 *
+	 * Switching to the kernel context is often used a synchronous
+	 * step prior to idling, e.g. in suspend for flushing all
+	 * current operations to memory before sleeping. These we
+	 * want to complete as quickly as possible to avoid prolonged
+	 * stalls, so allow the gpu to boost to maximum clocks.
+	 */
+	if (flags & I915_WAIT_FOR_IDLE_BOOST)
+		gen6_rps_boost(rq, NULL);
+
+	ret = i915_request_wait(rq, flags, MAX_SCHEDULE_TIMEOUT);
+	i915_request_put(rq);
+
+	return ret < 0 ? ret : 0;
 }
 
 static int wait_for_engines(struct drm_i915_private *i915)
@@ -4979,7 +5001,8 @@  int i915_gem_suspend(struct drm_i915_private *dev_priv)
 
 		ret = i915_gem_wait_for_idle(dev_priv,
 					     I915_WAIT_INTERRUPTIBLE |
-					     I915_WAIT_LOCKED);
+					     I915_WAIT_LOCKED |
+					     I915_WAIT_FOR_IDLE_BOOST);
 		if (ret && ret != -EIO)
 			goto err_unlock;
 
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 1bbbb7a9fa03..491ff81d0fea 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -267,6 +267,7 @@  long i915_request_wait(struct i915_request *rq,
 #define I915_WAIT_INTERRUPTIBLE	BIT(0)
 #define I915_WAIT_LOCKED	BIT(1) /* struct_mutex held, handle GPU reset */
 #define I915_WAIT_ALL		BIT(2) /* used by i915_gem_object_wait() */
+#define I915_WAIT_FOR_IDLE_BOOST BIT(3)
 
 static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);