diff mbox

drm/i915: enable HiZ Raw Stall Optimization

Message ID 1390810716-13510-1-git-send-email-olvaffe@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Chia-I Wu Jan. 27, 2014, 8:18 a.m. UTC
From: Chia-I Wu <olv@lunarg.com>

The optimization is available on Ivy Bridge and later, and is disabled by
default.  Enabling it helps certain workloads such as GLBenchmark TRex test.

Signed-off-by: Chia-I Wu <olv@lunarg.com>
Cc: Ian Romanick <ian.d.romanick@intel.com>
Cc: Chad Versace <chad.versace@linux.intel.com>

---
 drivers/gpu/drm/i915/i915_reg.h     | 2 ++
 drivers/gpu/drm/i915/i915_suspend.c | 9 +++++++--
 drivers/gpu/drm/i915/intel_pm.c     | 8 ++++++++
 3 files changed, 17 insertions(+), 2 deletions(-)

Comments

Chia-I Wu Jan. 27, 2014, 8:33 a.m. UTC | #1
[Additional comments, and copy Ian and Chad for real]

On Mon, Jan 27, 2014 at 4:18 PM, Chia-I Wu <olvaffe@gmail.com> wrote:
> From: Chia-I Wu <olv@lunarg.com>
>
> The optimization is available on Ivy Bridge and later, and is disabled by
> default.  Enabling it helps certain workloads such as GLBenchmark TRex test.
With the patch applied, GLB27_TRex_C24Z16_FixedTimeStep goes from
99fps to 109fps on my Haswell, and from 60fps to 65fps on my Ivy
Bridge.  No piglit regression on both GENs.

I had a non-recoverable system hang once with the patch applied.  I
was not sure if it is because of the patch or drm-intel-nightly (which
I checkout out some weeks ago).  I did my tests today against latest
drm-intel-next, and did not have any hang.  Since the optimization is
disabled by default, I am curious if there is any caveat before
enabling it.

>
> Signed-off-by: Chia-I Wu <olv@lunarg.com>
> Cc: Ian Romanick <ian.d.romanick@intel.com>
> Cc: Chad Versace <chad.versace@linux.intel.com>
>
> ---
>  drivers/gpu/drm/i915/i915_reg.h     | 2 ++
>  drivers/gpu/drm/i915/i915_suspend.c | 9 +++++++--
>  drivers/gpu/drm/i915/intel_pm.c     | 8 ++++++++
>  3 files changed, 17 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index ee27421..bd90ef3 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -930,6 +930,8 @@
>  #define   ECO_GATING_CX_ONLY   (1<<3)
>  #define   ECO_FLIP_DONE                (1<<0)
>
> +#define CACHE_MODE_0_GEN7      0x7000 /* IVB+ */
> +#define   HIZ_RAW_STALL_OPT_DISABLE (1<<2)
>  #define CACHE_MODE_1           0x7004 /* IVB+ */
>  #define   PIXEL_SUBSPAN_COLLECT_OPT_DISABLE (1<<6)
>
> diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c
> index 98790c7..13fefbd 100644
> --- a/drivers/gpu/drm/i915/i915_suspend.c
> +++ b/drivers/gpu/drm/i915/i915_suspend.c
> @@ -398,7 +398,9 @@ int i915_save_state(struct drm_device *dev)
>         intel_disable_gt_powersave(dev);
>
>         /* Cache mode state */
> -       if (INTEL_INFO(dev)->gen < 7)
> +       if (INTEL_INFO(dev)->gen >= 7)
> +               dev_priv->regfile.saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0_GEN7);
> +       else
>                 dev_priv->regfile.saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0);
>
>         /* Memory Arbitration state */
> @@ -448,7 +450,10 @@ int i915_restore_state(struct drm_device *dev)
>         }
>
>         /* Cache mode state */
> -       if (INTEL_INFO(dev)->gen < 7)
> +       if (INTEL_INFO(dev)->gen >= 7)
> +               I915_WRITE(CACHE_MODE_0_GEN7, dev_priv->regfile.saveCACHE_MODE_0 |
> +                          0xffff0000);
> +       else
>                 I915_WRITE(CACHE_MODE_0, dev_priv->regfile.saveCACHE_MODE_0 |
>                            0xffff0000);
>
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 26c29c1..d6ddc39 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -5355,6 +5355,10 @@ static void haswell_init_clock_gating(struct drm_device *dev)
>         /* WaVSRefCountFullforceMissDisable:hsw */
>         gen7_setup_fixed_func_scheduler(dev_priv);
>
> +       /* enable HiZ Raw Stall Optimization */
> +       I915_WRITE(CACHE_MODE_0_GEN7,
> +                  _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
> +
>         /* WaDisable4x2SubspanOptimization:hsw */
>         I915_WRITE(CACHE_MODE_1,
>                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
> @@ -5445,6 +5449,10 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
>         /* WaVSRefCountFullforceMissDisable:ivb */
>         gen7_setup_fixed_func_scheduler(dev_priv);
>
> +       /* enable HiZ Raw Stall Optimization */
> +       I915_WRITE(CACHE_MODE_0_GEN7,
> +                  _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
> +
>         /* WaDisable4x2SubspanOptimization:ivb */
>         I915_WRITE(CACHE_MODE_1,
>                    _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
> --
> 1.8.5.3
>
Daniel Vetter Jan. 27, 2014, 9:04 a.m. UTC | #2
On Mon, Jan 27, 2014 at 9:33 AM, Chia-I Wu <olvaffe@gmail.com> wrote:
> [Additional comments, and copy Ian and Chad for real]
>
> On Mon, Jan 27, 2014 at 4:18 PM, Chia-I Wu <olvaffe@gmail.com> wrote:
>> From: Chia-I Wu <olv@lunarg.com>
>>
>> The optimization is available on Ivy Bridge and later, and is disabled by
>> default.  Enabling it helps certain workloads such as GLBenchmark TRex test.
> With the patch applied, GLB27_TRex_C24Z16_FixedTimeStep goes from
> 99fps to 109fps on my Haswell, and from 60fps to 65fps on my Ivy
> Bridge.  No piglit regression on both GENs.
>
> I had a non-recoverable system hang once with the patch applied.  I
> was not sure if it is because of the patch or drm-intel-nightly (which
> I checkout out some weeks ago).  I did my tests today against latest
> drm-intel-next, and did not have any hang.  Since the optimization is
> disabled by default, I am curious if there is any caveat before
> enabling it.

Chad's still missing ;-) Also the hang might just be ppgtt fallout,
there's still a few regressions with that.
-Daniel
Ville Syrjälä Jan. 27, 2014, 1:07 p.m. UTC | #3
On Mon, Jan 27, 2014 at 04:18:36PM +0800, Chia-I Wu wrote:
> From: Chia-I Wu <olv@lunarg.com>
> 
> The optimization is available on Ivy Bridge and later, and is disabled by
> default.  Enabling it helps certain workloads such as GLBenchmark TRex test.

Actually BSpec even goes as far as saying that this optimization must
be enabled on HSW+.

So it seems you should enable it for BDW as well. I'm not sure about VLV.
The description of the bit says nothing about VLV, even though the
documented default value is specified to have it set for VLV as well. I
guess someone should just try it and see what happens.

Might make sense to split the patch into per-platforms patches. That way
we could more easily revert eg. just the IVB part if it causes problems.

> 
> Signed-off-by: Chia-I Wu <olv@lunarg.com>
> Cc: Ian Romanick <ian.d.romanick@intel.com>
> Cc: Chad Versace <chad.versace@linux.intel.com>
> 
> ---
>  drivers/gpu/drm/i915/i915_reg.h     | 2 ++
>  drivers/gpu/drm/i915/i915_suspend.c | 9 +++++++--
>  drivers/gpu/drm/i915/intel_pm.c     | 8 ++++++++
>  3 files changed, 17 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index ee27421..bd90ef3 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -930,6 +930,8 @@
>  #define   ECO_GATING_CX_ONLY	(1<<3)
>  #define   ECO_FLIP_DONE		(1<<0)
>  
> +#define CACHE_MODE_0_GEN7	0x7000 /* IVB+ */
> +#define   HIZ_RAW_STALL_OPT_DISABLE (1<<2)
>  #define CACHE_MODE_1		0x7004 /* IVB+ */
>  #define   PIXEL_SUBSPAN_COLLECT_OPT_DISABLE (1<<6)
>  
> diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c
> index 98790c7..13fefbd 100644
> --- a/drivers/gpu/drm/i915/i915_suspend.c
> +++ b/drivers/gpu/drm/i915/i915_suspend.c
> @@ -398,7 +398,9 @@ int i915_save_state(struct drm_device *dev)
>  	intel_disable_gt_powersave(dev);
>  
>  	/* Cache mode state */
> -	if (INTEL_INFO(dev)->gen < 7)
> +	if (INTEL_INFO(dev)->gen >= 7)
> +		dev_priv->regfile.saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0_GEN7);
> +	else
>  		dev_priv->regfile.saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0);
>  
>  	/* Memory Arbitration state */
> @@ -448,7 +450,10 @@ int i915_restore_state(struct drm_device *dev)
>  	}
>  
>  	/* Cache mode state */
> -	if (INTEL_INFO(dev)->gen < 7)
> +	if (INTEL_INFO(dev)->gen >= 7)
> +		I915_WRITE(CACHE_MODE_0_GEN7, dev_priv->regfile.saveCACHE_MODE_0 |
> +			   0xffff0000);
> +	else
>  		I915_WRITE(CACHE_MODE_0, dev_priv->regfile.saveCACHE_MODE_0 |
>  			   0xffff0000);

These hunks are material for a separate patch.

>  
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 26c29c1..d6ddc39 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -5355,6 +5355,10 @@ static void haswell_init_clock_gating(struct drm_device *dev)
>  	/* WaVSRefCountFullforceMissDisable:hsw */
>  	gen7_setup_fixed_func_scheduler(dev_priv);
>  
> +	/* enable HiZ Raw Stall Optimization */
> +	I915_WRITE(CACHE_MODE_0_GEN7,
> +		   _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
> +
>  	/* WaDisable4x2SubspanOptimization:hsw */
>  	I915_WRITE(CACHE_MODE_1,
>  		   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
> @@ -5445,6 +5449,10 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
>  	/* WaVSRefCountFullforceMissDisable:ivb */
>  	gen7_setup_fixed_func_scheduler(dev_priv);
>  
> +	/* enable HiZ Raw Stall Optimization */
> +	I915_WRITE(CACHE_MODE_0_GEN7,
> +		   _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
> +
>  	/* WaDisable4x2SubspanOptimization:ivb */
>  	I915_WRITE(CACHE_MODE_1,
>  		   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
> -- 
> 1.8.5.3
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Daniel Vetter Jan. 27, 2014, 4:22 p.m. UTC | #4
On Mon, Jan 27, 2014 at 03:07:45PM +0200, Ville Syrjälä wrote:
> On Mon, Jan 27, 2014 at 04:18:36PM +0800, Chia-I Wu wrote:
> > From: Chia-I Wu <olv@lunarg.com>
> > 
> > The optimization is available on Ivy Bridge and later, and is disabled by
> > default.  Enabling it helps certain workloads such as GLBenchmark TRex test.
> 
> Actually BSpec even goes as far as saying that this optimization must
> be enabled on HSW+.
> 
> So it seems you should enable it for BDW as well. I'm not sure about VLV.
> The description of the bit says nothing about VLV, even though the
> documented default value is specified to have it set for VLV as well. I
> guess someone should just try it and see what happens.
> 
> Might make sense to split the patch into per-platforms patches. That way
> we could more easily revert eg. just the IVB part if it causes problems.
> 
> > 
> > Signed-off-by: Chia-I Wu <olv@lunarg.com>
> > Cc: Ian Romanick <ian.d.romanick@intel.com>
> > Cc: Chad Versace <chad.versace@linux.intel.com>
> > 
> > ---
> >  drivers/gpu/drm/i915/i915_reg.h     | 2 ++
> >  drivers/gpu/drm/i915/i915_suspend.c | 9 +++++++--
> >  drivers/gpu/drm/i915/intel_pm.c     | 8 ++++++++
> >  3 files changed, 17 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> > index ee27421..bd90ef3 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -930,6 +930,8 @@
> >  #define   ECO_GATING_CX_ONLY	(1<<3)
> >  #define   ECO_FLIP_DONE		(1<<0)
> >  
> > +#define CACHE_MODE_0_GEN7	0x7000 /* IVB+ */
> > +#define   HIZ_RAW_STALL_OPT_DISABLE (1<<2)
> >  #define CACHE_MODE_1		0x7004 /* IVB+ */
> >  #define   PIXEL_SUBSPAN_COLLECT_OPT_DISABLE (1<<6)
> >  
> > diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c
> > index 98790c7..13fefbd 100644
> > --- a/drivers/gpu/drm/i915/i915_suspend.c
> > +++ b/drivers/gpu/drm/i915/i915_suspend.c
> > @@ -398,7 +398,9 @@ int i915_save_state(struct drm_device *dev)
> >  	intel_disable_gt_powersave(dev);
> >  
> >  	/* Cache mode state */
> > -	if (INTEL_INFO(dev)->gen < 7)
> > +	if (INTEL_INFO(dev)->gen >= 7)
> > +		dev_priv->regfile.saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0_GEN7);
> > +	else
> >  		dev_priv->regfile.saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0);
> >  
> >  	/* Memory Arbitration state */
> > @@ -448,7 +450,10 @@ int i915_restore_state(struct drm_device *dev)
> >  	}
> >  
> >  	/* Cache mode state */
> > -	if (INTEL_INFO(dev)->gen < 7)
> > +	if (INTEL_INFO(dev)->gen >= 7)
> > +		I915_WRITE(CACHE_MODE_0_GEN7, dev_priv->regfile.saveCACHE_MODE_0 |
> > +			   0xffff0000);
> > +	else
> >  		I915_WRITE(CACHE_MODE_0, dev_priv->regfile.saveCACHE_MODE_0 |
> >  			   0xffff0000);
> 
> These hunks are material for a separate patch.

Also they shouldn't be required. On all modern platforms our setup code
(init_clock_gating callback) which runs both at driver load time and
resume time should take care of all these bits and registers. If we miss
some of them, we need to add them. So please drop this hunk for v2.
-Daniel

> 
> >  
> > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> > index 26c29c1..d6ddc39 100644
> > --- a/drivers/gpu/drm/i915/intel_pm.c
> > +++ b/drivers/gpu/drm/i915/intel_pm.c
> > @@ -5355,6 +5355,10 @@ static void haswell_init_clock_gating(struct drm_device *dev)
> >  	/* WaVSRefCountFullforceMissDisable:hsw */
> >  	gen7_setup_fixed_func_scheduler(dev_priv);
> >  
> > +	/* enable HiZ Raw Stall Optimization */
> > +	I915_WRITE(CACHE_MODE_0_GEN7,
> > +		   _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
> > +
> >  	/* WaDisable4x2SubspanOptimization:hsw */
> >  	I915_WRITE(CACHE_MODE_1,
> >  		   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
> > @@ -5445,6 +5449,10 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
> >  	/* WaVSRefCountFullforceMissDisable:ivb */
> >  	gen7_setup_fixed_func_scheduler(dev_priv);
> >  
> > +	/* enable HiZ Raw Stall Optimization */
> > +	I915_WRITE(CACHE_MODE_0_GEN7,
> > +		   _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
> > +
> >  	/* WaDisable4x2SubspanOptimization:ivb */
> >  	I915_WRITE(CACHE_MODE_1,
> >  		   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
> > -- 
> > 1.8.5.3
> > 
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
> -- 
> Ville Syrjälä
> Intel OTC
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chia-I Wu Jan. 28, 2014, 4:40 a.m. UTC | #5
On Mon, Jan 27, 2014 at 9:07 PM, Ville Syrjälä
<ville.syrjala@linux.intel.com> wrote:
> On Mon, Jan 27, 2014 at 04:18:36PM +0800, Chia-I Wu wrote:
>> From: Chia-I Wu <olv@lunarg.com>
>>
>> The optimization is available on Ivy Bridge and later, and is disabled by
>> default.  Enabling it helps certain workloads such as GLBenchmark TRex test.
>
> Actually BSpec even goes as far as saying that this optimization must
> be enabled on HSW+.
The public documentation actually says if you want the optimization,
you must enable it.  Kind of stating the obvious. :)

> So it seems you should enable it for BDW as well. I'm not sure about VLV.
> The description of the bit says nothing about VLV, even though the
> documented default value is specified to have it set for VLV as well. I
> guess someone should just try it and see what happens.
>
> Might make sense to split the patch into per-platforms patches. That way
> we could more easily revert eg. just the IVB part if it causes problems.
Will do.  Though I will leave BDW/VLV out as I do not have the hardware.

>>
>> Signed-off-by: Chia-I Wu <olv@lunarg.com>
>> Cc: Ian Romanick <ian.d.romanick@intel.com>
>> Cc: Chad Versace <chad.versace@linux.intel.com>
>>
>> ---
>>  drivers/gpu/drm/i915/i915_reg.h     | 2 ++
>>  drivers/gpu/drm/i915/i915_suspend.c | 9 +++++++--
>>  drivers/gpu/drm/i915/intel_pm.c     | 8 ++++++++
>>  3 files changed, 17 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> index ee27421..bd90ef3 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -930,6 +930,8 @@
>>  #define   ECO_GATING_CX_ONLY (1<<3)
>>  #define   ECO_FLIP_DONE              (1<<0)
>>
>> +#define CACHE_MODE_0_GEN7    0x7000 /* IVB+ */
>> +#define   HIZ_RAW_STALL_OPT_DISABLE (1<<2)
>>  #define CACHE_MODE_1         0x7004 /* IVB+ */
>>  #define   PIXEL_SUBSPAN_COLLECT_OPT_DISABLE (1<<6)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c
>> index 98790c7..13fefbd 100644
>> --- a/drivers/gpu/drm/i915/i915_suspend.c
>> +++ b/drivers/gpu/drm/i915/i915_suspend.c
>> @@ -398,7 +398,9 @@ int i915_save_state(struct drm_device *dev)
>>       intel_disable_gt_powersave(dev);
>>
>>       /* Cache mode state */
>> -     if (INTEL_INFO(dev)->gen < 7)
>> +     if (INTEL_INFO(dev)->gen >= 7)
>> +             dev_priv->regfile.saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0_GEN7);
>> +     else
>>               dev_priv->regfile.saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0);
>>
>>       /* Memory Arbitration state */
>> @@ -448,7 +450,10 @@ int i915_restore_state(struct drm_device *dev)
>>       }
>>
>>       /* Cache mode state */
>> -     if (INTEL_INFO(dev)->gen < 7)
>> +     if (INTEL_INFO(dev)->gen >= 7)
>> +             I915_WRITE(CACHE_MODE_0_GEN7, dev_priv->regfile.saveCACHE_MODE_0 |
>> +                        0xffff0000);
>> +     else
>>               I915_WRITE(CACHE_MODE_0, dev_priv->regfile.saveCACHE_MODE_0 |
>>                          0xffff0000);
>
> These hunks are material for a separate patch.
>
>>
>> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
>> index 26c29c1..d6ddc39 100644
>> --- a/drivers/gpu/drm/i915/intel_pm.c
>> +++ b/drivers/gpu/drm/i915/intel_pm.c
>> @@ -5355,6 +5355,10 @@ static void haswell_init_clock_gating(struct drm_device *dev)
>>       /* WaVSRefCountFullforceMissDisable:hsw */
>>       gen7_setup_fixed_func_scheduler(dev_priv);
>>
>> +     /* enable HiZ Raw Stall Optimization */
>> +     I915_WRITE(CACHE_MODE_0_GEN7,
>> +                _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
>> +
>>       /* WaDisable4x2SubspanOptimization:hsw */
>>       I915_WRITE(CACHE_MODE_1,
>>                  _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
>> @@ -5445,6 +5449,10 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
>>       /* WaVSRefCountFullforceMissDisable:ivb */
>>       gen7_setup_fixed_func_scheduler(dev_priv);
>>
>> +     /* enable HiZ Raw Stall Optimization */
>> +     I915_WRITE(CACHE_MODE_0_GEN7,
>> +                _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
>> +
>>       /* WaDisable4x2SubspanOptimization:ivb */
>>       I915_WRITE(CACHE_MODE_1,
>>                  _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
>> --
>> 1.8.5.3
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
> --
> Ville Syrjälä
> Intel OTC
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index ee27421..bd90ef3 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -930,6 +930,8 @@ 
 #define   ECO_GATING_CX_ONLY	(1<<3)
 #define   ECO_FLIP_DONE		(1<<0)
 
+#define CACHE_MODE_0_GEN7	0x7000 /* IVB+ */
+#define   HIZ_RAW_STALL_OPT_DISABLE (1<<2)
 #define CACHE_MODE_1		0x7004 /* IVB+ */
 #define   PIXEL_SUBSPAN_COLLECT_OPT_DISABLE (1<<6)
 
diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c
index 98790c7..13fefbd 100644
--- a/drivers/gpu/drm/i915/i915_suspend.c
+++ b/drivers/gpu/drm/i915/i915_suspend.c
@@ -398,7 +398,9 @@  int i915_save_state(struct drm_device *dev)
 	intel_disable_gt_powersave(dev);
 
 	/* Cache mode state */
-	if (INTEL_INFO(dev)->gen < 7)
+	if (INTEL_INFO(dev)->gen >= 7)
+		dev_priv->regfile.saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0_GEN7);
+	else
 		dev_priv->regfile.saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0);
 
 	/* Memory Arbitration state */
@@ -448,7 +450,10 @@  int i915_restore_state(struct drm_device *dev)
 	}
 
 	/* Cache mode state */
-	if (INTEL_INFO(dev)->gen < 7)
+	if (INTEL_INFO(dev)->gen >= 7)
+		I915_WRITE(CACHE_MODE_0_GEN7, dev_priv->regfile.saveCACHE_MODE_0 |
+			   0xffff0000);
+	else
 		I915_WRITE(CACHE_MODE_0, dev_priv->regfile.saveCACHE_MODE_0 |
 			   0xffff0000);
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 26c29c1..d6ddc39 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -5355,6 +5355,10 @@  static void haswell_init_clock_gating(struct drm_device *dev)
 	/* WaVSRefCountFullforceMissDisable:hsw */
 	gen7_setup_fixed_func_scheduler(dev_priv);
 
+	/* enable HiZ Raw Stall Optimization */
+	I915_WRITE(CACHE_MODE_0_GEN7,
+		   _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
+
 	/* WaDisable4x2SubspanOptimization:hsw */
 	I915_WRITE(CACHE_MODE_1,
 		   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
@@ -5445,6 +5449,10 @@  static void ivybridge_init_clock_gating(struct drm_device *dev)
 	/* WaVSRefCountFullforceMissDisable:ivb */
 	gen7_setup_fixed_func_scheduler(dev_priv);
 
+	/* enable HiZ Raw Stall Optimization */
+	I915_WRITE(CACHE_MODE_0_GEN7,
+		   _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
+
 	/* WaDisable4x2SubspanOptimization:ivb */
 	I915_WRITE(CACHE_MODE_1,
 		   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));