diff mbox

[2/3] drm/i915: put ring frequency and turbo setup into a work queue v2

Message ID 1351271318-3148-2-git-send-email-jbarnes@virtuousgeek.org (mailing list archive)
State New, archived
Headers show

Commit Message

Jesse Barnes Oct. 26, 2012, 5:08 p.m. UTC
Communicating via the mailbox registers with the PCU can take quite
awhile.  And updating the ring frequency or enabling turbo is not
something that needs to happen synchronously, so take it out of our init
and resume paths to speed things up (~200ms on my T420).

v2: add comment about why we use a work queue (Daniel)
    make sure work queue is idle on suspend (Daniel)
    use a delayed work queue since there's no hurry (Daniel)

References: https://bugs.freedesktop.org/show_bug.cgi?id=54089

Signed-of-by: Jesse Barnes <jbarnes@virtuougseek.org>
---
 drivers/gpu/drm/i915/i915_dma.c |    1 +
 drivers/gpu/drm/i915/i915_drv.c |    2 ++
 drivers/gpu/drm/i915/i915_drv.h |    3 +++
 drivers/gpu/drm/i915/intel_pm.c |   32 ++++++++++++++++++++++++++++++--
 4 files changed, 36 insertions(+), 2 deletions(-)

Comments

Rodrigo Vivi Oct. 30, 2012, 4:53 p.m. UTC | #1
Reviewed and tested on My SNB fixing bug:
https://bugs.freedesktop.org/show_bug.cgi?id=54089

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@gmail.com>
Tested-by: Rodrigo Vivi <rodrigo.vivi@gmail.com>

On Fri, Oct 26, 2012 at 3:08 PM, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> Communicating via the mailbox registers with the PCU can take quite
> awhile.  And updating the ring frequency or enabling turbo is not
> something that needs to happen synchronously, so take it out of our init
> and resume paths to speed things up (~200ms on my T420).
>
> v2: add comment about why we use a work queue (Daniel)
>     make sure work queue is idle on suspend (Daniel)
>     use a delayed work queue since there's no hurry (Daniel)
>
> References: https://bugs.freedesktop.org/show_bug.cgi?id=54089
>
> Signed-of-by: Jesse Barnes <jbarnes@virtuougseek.org>
> ---
>  drivers/gpu/drm/i915/i915_dma.c |    1 +
>  drivers/gpu/drm/i915/i915_drv.c |    2 ++
>  drivers/gpu/drm/i915/i915_drv.h |    3 +++
>  drivers/gpu/drm/i915/intel_pm.c |   32 ++++++++++++++++++++++++++++++--
>  4 files changed, 36 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 14526dc..b5977b4 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1725,6 +1725,7 @@ int i915_driver_unload(struct drm_device *dev)
>         if (drm_core_check_feature(dev, DRIVER_MODESET)) {
>                 intel_fbdev_fini(dev);
>                 intel_modeset_cleanup(dev);
> +               intel_gt_cleanup(dev);
>                 cancel_work_sync(&dev_priv->console_resume_work);
>
>                 /*
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 6d1fb51..4d858a9 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -470,6 +470,8 @@ static int i915_drm_freeze(struct drm_device *dev)
>                         return error;
>                 }
>
> +               intel_gt_cleanup(dev);
> +
>                 intel_modeset_disable(dev);
>
>                 drm_irq_uninstall(dev);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a5b0456..1e84a59 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -871,6 +871,8 @@ typedef struct drm_i915_private {
>                 int r_t;
>         } ips;
>
> +       struct delayed_work gen6_power_work;
> +
>         enum no_fbc_reason no_fbc_reason;
>
>         struct drm_mm_node *compressed_fb;
> @@ -1268,6 +1270,7 @@ void i915_handle_error(struct drm_device *dev, bool wedged);
>  extern void intel_irq_init(struct drm_device *dev);
>  extern void intel_gt_init(struct drm_device *dev);
>  extern void intel_gt_reset(struct drm_device *dev);
> +extern void intel_gt_cleanup(struct drm_device *dev);
>
>  void i915_error_state_free(struct kref *error_ref);
>
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 59068be..025abbf 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3312,15 +3312,34 @@ void intel_disable_gt_powersave(struct drm_device *dev)
>         }
>  }
>
> +static void intel_gen6_powersave_work(struct work_struct *work)
> +{
> +       struct drm_i915_private *dev_priv =
> +               container_of(work, struct drm_i915_private,
> +                            gen6_power_work.work);
> +       struct drm_device *dev = dev_priv->dev;
> +
> +       mutex_lock(&dev->struct_mutex);
> +       gen6_enable_rps(dev);
> +       gen6_update_ring_freq(dev);
> +       mutex_unlock(&dev->struct_mutex);
> +}
> +
>  void intel_enable_gt_powersave(struct drm_device *dev)
>  {
> +       struct drm_i915_private *dev_priv = dev->dev_private;
> +
>         if (IS_IRONLAKE_M(dev)) {
>                 ironlake_enable_drps(dev);
>                 ironlake_enable_rc6(dev);
>                 intel_init_emon(dev);
>         } else if ((IS_GEN6(dev) || IS_GEN7(dev)) && !IS_VALLEYVIEW(dev)) {
> -               gen6_enable_rps(dev);
> -               gen6_update_ring_freq(dev);
> +               /*
> +                * PCU communication is slow and this doesn't need to be
> +                * done at any specific time, so do this out of our fast path
> +                * to make resume and init faster.
> +                */
> +               schedule_delayed_work(&dev_priv->gen6_power_work, HZ);
>         }
>  }
>
> @@ -4175,6 +4194,15 @@ void intel_gt_init(struct drm_device *dev)
>                 dev_priv->gt.force_wake_get = __gen6_gt_force_wake_get;
>                 dev_priv->gt.force_wake_put = __gen6_gt_force_wake_put;
>         }
> +       INIT_DELAYED_WORK(&dev_priv->gen6_power_work,
> +                         intel_gen6_powersave_work);
> +}
> +
> +void intel_gt_cleanup(struct drm_device *dev)
> +{
> +       struct drm_i915_private *dev_priv = dev->dev_private;
> +
> +       cancel_delayed_work_sync(&dev_priv->gen6_power_work);
>  }
>
>  int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val)
> --
> 1.7.9.5
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Jesse Barnes Oct. 30, 2012, 4:58 p.m. UTC | #2
On Tue, 30 Oct 2012 14:53:15 -0200
Rodrigo Vivi <rodrigo.vivi@gmail.com> wrote:

> Reviewed and tested on My SNB fixing bug:
> https://bugs.freedesktop.org/show_bug.cgi?id=54089
> 
> Reviewed-by: Rodrigo Vivi <rodrigo.vivi@gmail.com>
> Tested-by: Rodrigo Vivi <rodrigo.vivi@gmail.com>

Cool, thanks for testing.  It really fixed that bug?  I had my
doubts. :)
Rodrigo Vivi Oct. 30, 2012, 5:03 p.m. UTC | #3
I was here trying to cheking init values and change forcewake set
during init, etc and nothing was getting RC6 running after resume
besides that msleep(50) workaround after setting CACHE_MODE_0...

Now with your patch my RC6 is back to life after resume: RC6 35.8% :D

Well, but when looking at BSPec I noticed that CACHE_MODE_0 is 0x02120
only for SNB. For IVB+ is 0x7004 what is defined as CACHE_MODE_1 in
our code.
I'm afraid we are doing something wrong here.... or am I missing something?

On Tue, Oct 30, 2012 at 2:58 PM, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> On Tue, 30 Oct 2012 14:53:15 -0200
> Rodrigo Vivi <rodrigo.vivi@gmail.com> wrote:
>
>> Reviewed and tested on My SNB fixing bug:
>> https://bugs.freedesktop.org/show_bug.cgi?id=54089
>>
>> Reviewed-by: Rodrigo Vivi <rodrigo.vivi@gmail.com>
>> Tested-by: Rodrigo Vivi <rodrigo.vivi@gmail.com>
>
> Cool, thanks for testing.  It really fixed that bug?  I had my
> doubts. :)
>
> --
> Jesse Barnes, Intel Open Source Technology Center
Daniel Vetter Oct. 30, 2012, 5:09 p.m. UTC | #4
On Tue, Oct 30, 2012 at 6:03 PM, Rodrigo Vivi <rodrigo.vivi@gmail.com> wrote:
> I was here trying to cheking init values and change forcewake set
> during init, etc and nothing was getting RC6 running after resume
> besides that msleep(50) workaround after setting CACHE_MODE_0...
>
> Now with your patch my RC6 is back to life after resume: RC6 35.8% :D
>
> Well, but when looking at BSPec I noticed that CACHE_MODE_0 is 0x02120
> only for SNB. For IVB+ is 0x7004 what is defined as CACHE_MODE_1 in
> our code.
> I'm afraid we are doing something wrong here.... or am I missing something?

This is the legacy register save/restore code for resume, which should
be burnt. On a big pyre. With gasoline. Really.

Cheers, Daniel

PS: Unfortunately it's a giant mess to review all the code paths and
make sure that we can disable a given register restore block for kms.
I've started, but it's slow-going (see the "dont save/restore * regs
for kms" patch series which is already merged). Patches _highly_
welcome.
Daniel Vetter Oct. 30, 2012, 5:17 p.m. UTC | #5
On Fri, Oct 26, 2012 at 7:08 PM, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> Communicating via the mailbox registers with the PCU can take quite
> awhile.  And updating the ring frequency or enabling turbo is not
> something that needs to happen synchronously, so take it out of our init
> and resume paths to speed things up (~200ms on my T420).
>
> v2: add comment about why we use a work queue (Daniel)
>     make sure work queue is idle on suspend (Daniel)
>     use a delayed work queue since there's no hurry (Daniel)
>
> References: https://bugs.freedesktop.org/show_bug.cgi?id=54089
>
> Signed-of-by: Jesse Barnes <jbarnes@virtuougseek.org>

Unfortunately I have to tear this patch apart ;-)

- The cleanup sequence is inconsistent afaict: Check the ordering of
intel_gt_cleanup vs. intel_disable_gt_powersave

- cancel_*_sync is on a level with *_lock in it's ability to hang
machines in a deadlock. Hiding it in an innocent sounding functions
like gt_cleanup (which _only_ cancels the work item) is not a good
idea. Also, I'd really prefer more symmetric in our setup/teardown
code, so if that cancel_work can't be in in disable_gt_powersave, we
need a good reason for it. I know, lock inversion, but my next point
will solve that ;-)

- you still grab the dev->struct_mutex lock, which means you've only
moved that 200ms delay to someplace you don't measure it. Most likely
it'll be right when userspace wants to do a nice 15 frames fade-in for
the unlock screen, which we'll completely drop on the floor due to
hogging the execbuf lock for an eternity.

Iow we need to add a new dev_priv->rps.lock mutex in a first patch,
then move things around like you do here. That should also take care
of any deadlock problems with the work item itself, so you can move
the cancel_work into disable_gt_powersave right before we shut down
rps/rc6.

Cheers, Daniel
Jesse Barnes Oct. 31, 2012, 8:44 p.m. UTC | #6
On Tue, 30 Oct 2012 18:17:58 +0100
Daniel Vetter <daniel@ffwll.ch> wrote:

> On Fri, Oct 26, 2012 at 7:08 PM, Jesse Barnes <jbarnes@virtuousgeek.org> wrote:
> > Communicating via the mailbox registers with the PCU can take quite
> > awhile.  And updating the ring frequency or enabling turbo is not
> > something that needs to happen synchronously, so take it out of our init
> > and resume paths to speed things up (~200ms on my T420).
> >
> > v2: add comment about why we use a work queue (Daniel)
> >     make sure work queue is idle on suspend (Daniel)
> >     use a delayed work queue since there's no hurry (Daniel)
> >
> > References: https://bugs.freedesktop.org/show_bug.cgi?id=54089
> >
> > Signed-of-by: Jesse Barnes <jbarnes@virtuougseek.org>
> 
> Unfortunately I have to tear this patch apart ;-)
> 
> - The cleanup sequence is inconsistent afaict: Check the ordering of
> intel_gt_cleanup vs. intel_disable_gt_powersave

Ok will fix.

> - cancel_*_sync is on a level with *_lock in it's ability to hang
> machines in a deadlock. Hiding it in an innocent sounding functions
> like gt_cleanup (which _only_ cancels the work item) is not a good
> idea. Also, I'd really prefer more symmetric in our setup/teardown
> code, so if that cancel_work can't be in in disable_gt_powersave, we
> need a good reason for it. I know, lock inversion, but my next point
> will solve that ;-)

Note we have timeouts and such in the cancellation path, so I don't
think there's any way it could hang?

> - you still grab the dev->struct_mutex lock, which means you've only
> moved that 200ms delay to someplace you don't measure it. Most likely
> it'll be right when userspace wants to do a nice 15 frames fade-in for
> the unlock screen, which we'll completely drop on the floor due to
> hogging the execbuf lock for an eternity.

Oh it'll probably happen around VT switch time while we do the silly
dance (that's another thing I'd like to remove).

> Iow we need to add a new dev_priv->rps.lock mutex in a first patch,
> then move things around like you do here. That should also take care
> of any deadlock problems with the work item itself, so you can move
> the cancel_work into disable_gt_powersave right before we shut down
> rps/rc6.

Yeah that would be nicer.  There's really no need to block anyone on
the ring freq and RPS stuff; they can happen totally asynchronously (at
least I'm pretty sure of this, we do have bugs that indicate RPS vs RC6
may be an issue we need to handle, but that's neither here nor there).
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 14526dc..b5977b4 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1725,6 +1725,7 @@  int i915_driver_unload(struct drm_device *dev)
 	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
 		intel_fbdev_fini(dev);
 		intel_modeset_cleanup(dev);
+		intel_gt_cleanup(dev);
 		cancel_work_sync(&dev_priv->console_resume_work);
 
 		/*
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 6d1fb51..4d858a9 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -470,6 +470,8 @@  static int i915_drm_freeze(struct drm_device *dev)
 			return error;
 		}
 
+		intel_gt_cleanup(dev);
+
 		intel_modeset_disable(dev);
 
 		drm_irq_uninstall(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a5b0456..1e84a59 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -871,6 +871,8 @@  typedef struct drm_i915_private {
 		int r_t;
 	} ips;
 
+	struct delayed_work gen6_power_work;
+
 	enum no_fbc_reason no_fbc_reason;
 
 	struct drm_mm_node *compressed_fb;
@@ -1268,6 +1270,7 @@  void i915_handle_error(struct drm_device *dev, bool wedged);
 extern void intel_irq_init(struct drm_device *dev);
 extern void intel_gt_init(struct drm_device *dev);
 extern void intel_gt_reset(struct drm_device *dev);
+extern void intel_gt_cleanup(struct drm_device *dev);
 
 void i915_error_state_free(struct kref *error_ref);
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 59068be..025abbf 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3312,15 +3312,34 @@  void intel_disable_gt_powersave(struct drm_device *dev)
 	}
 }
 
+static void intel_gen6_powersave_work(struct work_struct *work)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(work, struct drm_i915_private,
+			     gen6_power_work.work);
+	struct drm_device *dev = dev_priv->dev;
+
+	mutex_lock(&dev->struct_mutex);
+	gen6_enable_rps(dev);
+	gen6_update_ring_freq(dev);
+	mutex_unlock(&dev->struct_mutex);
+}
+
 void intel_enable_gt_powersave(struct drm_device *dev)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
 	if (IS_IRONLAKE_M(dev)) {
 		ironlake_enable_drps(dev);
 		ironlake_enable_rc6(dev);
 		intel_init_emon(dev);
 	} else if ((IS_GEN6(dev) || IS_GEN7(dev)) && !IS_VALLEYVIEW(dev)) {
-		gen6_enable_rps(dev);
-		gen6_update_ring_freq(dev);
+		/*
+		 * PCU communication is slow and this doesn't need to be
+		 * done at any specific time, so do this out of our fast path
+		 * to make resume and init faster.
+		 */
+		schedule_delayed_work(&dev_priv->gen6_power_work, HZ);
 	}
 }
 
@@ -4175,6 +4194,15 @@  void intel_gt_init(struct drm_device *dev)
 		dev_priv->gt.force_wake_get = __gen6_gt_force_wake_get;
 		dev_priv->gt.force_wake_put = __gen6_gt_force_wake_put;
 	}
+	INIT_DELAYED_WORK(&dev_priv->gen6_power_work,
+			  intel_gen6_powersave_work);
+}
+
+void intel_gt_cleanup(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	cancel_delayed_work_sync(&dev_priv->gen6_power_work);
 }
 
 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val)