diff mbox

drm/i915: Speed up DMC firmware loading

Message ID 20170901140117.22173-1-david.weinehall@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

David Weinehall Sept. 1, 2017, 2:01 p.m. UTC
Currently we're doing:

1. acquire lock
2. write word to hardware
3. release lock
4. repeat from 1

to load the DMC firmware. Due to the cost of acquiring/releasing a lock,
and the size of the DMC firmware, this slows down DMC loading a lot.

This patch simply acquires the lock, writes the entire firmware,
then releases the lock.  Testing shows resume speedups
in the order of 10ms on platforms with DMC firmware (GEN9+).

Signed-off-by: David Weinehall <david.weinehall@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_csr.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

Comments

Chris Wilson Sept. 1, 2017, 2:10 p.m. UTC | #1
Quoting David Weinehall (2017-09-01 15:01:17)
> Currently we're doing:
> 
> 1. acquire lock
> 2. write word to hardware
> 3. release lock
> 4. repeat from 1
> 
> to load the DMC firmware. Due to the cost of acquiring/releasing a lock,
> and the size of the DMC firmware, this slows down DMC loading a lot.
> 
> This patch simply acquires the lock, writes the entire firmware,
> then releases the lock.  Testing shows resume speedups
> in the order of 10ms on platforms with DMC firmware (GEN9+).
> 
> Signed-off-by: David Weinehall <david.weinehall@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/intel_csr.c | 15 ++++++++++++++-
>  1 file changed, 14 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
> index 965988f79a55..b7a6ef7e0d53 100644
> --- a/drivers/gpu/drm/i915/intel_csr.c
> +++ b/drivers/gpu/drm/i915/intel_csr.c
> @@ -239,7 +239,9 @@ static void gen9_set_dc_state_debugmask(struct drm_i915_private *dev_priv)
>  void intel_csr_load_program(struct drm_i915_private *dev_priv)
>  {
>         u32 *payload = dev_priv->csr.dmc_payload;
> +       enum forcewake_domains fw_domains;
>         uint32_t i, fw_size;
> +       unsigned long flags;
>  
>         if (!HAS_CSR(dev_priv)) {
>                 DRM_ERROR("No CSR support available for this platform\n");
> @@ -251,9 +253,20 @@ void intel_csr_load_program(struct drm_i915_private *dev_priv)
>                 return;
>         }
>  
> +       fw_domains = intel_uncore_forcewake_for_reg(dev_priv,
> +                                                   CSR_PROGRAM(0),
> +                                                   FW_REG_WRITE);
> +
>         fw_size = dev_priv->csr.dmc_fw_size;
> +       assert_rpm_wakelock_held(dev_priv);
> +       spin_lock_irqsave(&dev_priv->uncore.lock, flags);
> +       intel_uncore_forcewake_get__locked(dev_priv, fw_domains);

One thing to note is that this platform doesn't have the concurrent mmio
bug, so we can allow others to run concurrently:

if (fw_domains)
	intel_uncore_forcewake_get(dev_priv, fw_domains);

And I'm pretty confident that fw_domains is 0 here.

> +
>         for (i = 0; i < fw_size; i++)
> -               I915_WRITE(CSR_PROGRAM(i), payload[i]);
> +               I915_WRITE_FW(CSR_PROGRAM(i), payload[i]);
> +
> +       intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
> +       spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);

if (fw_domains)
	intel_uncore_forcewake_put(dev_priv, fw_domains);

>  
>         for (i = 0; i < dev_priv->csr.mmio_count; i++) {
>                 I915_WRITE(dev_priv->csr.mmioaddr[i],
> -- 
> 2.14.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Ville Syrjälä Sept. 1, 2017, 2:18 p.m. UTC | #2
On Fri, Sep 01, 2017 at 03:10:58PM +0100, Chris Wilson wrote:
> Quoting David Weinehall (2017-09-01 15:01:17)
> > Currently we're doing:
> > 
> > 1. acquire lock
> > 2. write word to hardware
> > 3. release lock
> > 4. repeat from 1
> > 
> > to load the DMC firmware. Due to the cost of acquiring/releasing a lock,
> > and the size of the DMC firmware, this slows down DMC loading a lot.
> > 
> > This patch simply acquires the lock, writes the entire firmware,
> > then releases the lock.  Testing shows resume speedups
> > in the order of 10ms on platforms with DMC firmware (GEN9+).
> > 
> > Signed-off-by: David Weinehall <david.weinehall@linux.intel.com>
> > ---
> >  drivers/gpu/drm/i915/intel_csr.c | 15 ++++++++++++++-
> >  1 file changed, 14 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
> > index 965988f79a55..b7a6ef7e0d53 100644
> > --- a/drivers/gpu/drm/i915/intel_csr.c
> > +++ b/drivers/gpu/drm/i915/intel_csr.c
> > @@ -239,7 +239,9 @@ static void gen9_set_dc_state_debugmask(struct drm_i915_private *dev_priv)
> >  void intel_csr_load_program(struct drm_i915_private *dev_priv)
> >  {
> >         u32 *payload = dev_priv->csr.dmc_payload;
> > +       enum forcewake_domains fw_domains;
> >         uint32_t i, fw_size;
> > +       unsigned long flags;
> >  
> >         if (!HAS_CSR(dev_priv)) {
> >                 DRM_ERROR("No CSR support available for this platform\n");
> > @@ -251,9 +253,20 @@ void intel_csr_load_program(struct drm_i915_private *dev_priv)
> >                 return;
> >         }
> >  
> > +       fw_domains = intel_uncore_forcewake_for_reg(dev_priv,
> > +                                                   CSR_PROGRAM(0),
> > +                                                   FW_REG_WRITE);
> > +
> >         fw_size = dev_priv->csr.dmc_fw_size;
> > +       assert_rpm_wakelock_held(dev_priv);
> > +       spin_lock_irqsave(&dev_priv->uncore.lock, flags);
> > +       intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
> 
> One thing to note is that this platform doesn't have the concurrent mmio
> bug, so we can allow others to run concurrently:
> 
> if (fw_domains)
> 	intel_uncore_forcewake_get(dev_priv, fw_domains);
> 
> And I'm pretty confident that fw_domains is 0 here.

Yes, CSR_PROGRAM is 0x80000+ and all forcewake registers are below
0x40000, so we can omit the fw dance here as well.

> 
> > +
> >         for (i = 0; i < fw_size; i++)
> > -               I915_WRITE(CSR_PROGRAM(i), payload[i]);
> > +               I915_WRITE_FW(CSR_PROGRAM(i), payload[i]);
> > +
> > +       intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
> > +       spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
> 
> if (fw_domains)
> 	intel_uncore_forcewake_put(dev_priv, fw_domains);
> 
> >  
> >         for (i = 0; i < dev_priv->csr.mmio_count; i++) {
> >                 I915_WRITE(dev_priv->csr.mmioaddr[i],
> > -- 
> > 2.14.1
> > 
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 965988f79a55..b7a6ef7e0d53 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -239,7 +239,9 @@  static void gen9_set_dc_state_debugmask(struct drm_i915_private *dev_priv)
 void intel_csr_load_program(struct drm_i915_private *dev_priv)
 {
 	u32 *payload = dev_priv->csr.dmc_payload;
+	enum forcewake_domains fw_domains;
 	uint32_t i, fw_size;
+	unsigned long flags;
 
 	if (!HAS_CSR(dev_priv)) {
 		DRM_ERROR("No CSR support available for this platform\n");
@@ -251,9 +253,20 @@  void intel_csr_load_program(struct drm_i915_private *dev_priv)
 		return;
 	}
 
+	fw_domains = intel_uncore_forcewake_for_reg(dev_priv,
+						    CSR_PROGRAM(0),
+						    FW_REG_WRITE);
+
 	fw_size = dev_priv->csr.dmc_fw_size;
+	assert_rpm_wakelock_held(dev_priv);
+	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
+	intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
+
 	for (i = 0; i < fw_size; i++)
-		I915_WRITE(CSR_PROGRAM(i), payload[i]);
+		I915_WRITE_FW(CSR_PROGRAM(i), payload[i]);
+
+	intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
+	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
 
 	for (i = 0; i < dev_priv->csr.mmio_count; i++) {
 		I915_WRITE(dev_priv->csr.mmioaddr[i],