diff mbox

drm/i915: RMW register cycles considered evil

Message ID 1436186522-7834-1-git-send-email-daniel.vetter@ffwll.ch (mailing list archive)
State New, archived
Headers show

Commit Message

Daniel Vetter July 6, 2015, 12:42 p.m. UTC
Especially for workarounds which is stuff that's almost impossible to
verify: The initial state from the firmware on boot-up and after
resume could be different, which will hide bugs when we do an RMW
cycle.

Hence never do them, and if it's required we need a special mask.

Cc: Damien Lespiau <damien.lespiau@intel.com>
Cc: Imre Deak <imre.deak@intel.com>
Cc: Nick Hoath <nicholas.hoath@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

Comments

Lespiau, Damien July 6, 2015, 12:46 p.m. UTC | #1
On Mon, Jul 06, 2015 at 02:42:02PM +0200, Daniel Vetter wrote:
> Especially for workarounds which is stuff that's almost impossible to
> verify: The initial state from the firmware on boot-up and after
> resume could be different, which will hide bugs when we do an RMW
> cycle.
> 
> Hence never do them, and if it's required we need a special mask.
> 
> Cc: Damien Lespiau <damien.lespiau@intel.com>
> Cc: Imre Deak <imre.deak@intel.com>
> Cc: Nick Hoath <nicholas.hoath@intel.com>
> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>

Eeek. Let's take the problem the other way around: have you verified
it's OK to zero all those other fields?
Ville Syrjälä July 6, 2015, 12:50 p.m. UTC | #2
On Mon, Jul 06, 2015 at 02:42:02PM +0200, Daniel Vetter wrote:
> Especially for workarounds which is stuff that's almost impossible to
> verify: The initial state from the firmware on boot-up and after
> resume could be different, which will hide bugs when we do an RMW
> cycle.

If you're really worried about that then we should then explicitly
initialize all the registers that might affect stuff.

For a bunch of GT registers we could just do a GPU reset at driver
init. That that won't help with UCGCTL and such.

I'm also worried that if we don't use RMWs for early parts, the hardware
folks may still change the default for some ofhte other bits, and then
we end up clobbering those.

> 
> Hence never do them, and if it's required we need a special mask.
> 
> Cc: Damien Lespiau <damien.lespiau@intel.com>
> Cc: Imre Deak <imre.deak@intel.com>
> Cc: Nick Hoath <nicholas.hoath@intel.com>
> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_pm.c | 14 +++++++-------
>  1 file changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 166ae51f5a5b..565f78d6a21d 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -57,7 +57,7 @@ static void gen9_init_clock_gating(struct drm_device *dev)
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  
>  	/* WaEnableLbsSlaRetryTimerDecrement:skl */
> -	I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
> +	I915_WRITE(BDW_SCRATCH1,
>  		   GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
>  }
>  
> @@ -72,18 +72,18 @@ static void skl_init_clock_gating(struct drm_device *dev)
>  		 * WaDisableSDEUnitClockGating:skl
>  		 * WaSetGAPSunitClckGateDisable:skl
>  		 */
> -		I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
> +		I915_WRITE(GEN8_UCGCTL6,
>  			   GEN8_GAPSUNIT_CLOCK_GATE_DISABLE |
>  			   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
>  
>  		/* WaDisableVFUnitClockGating:skl */
> -		I915_WRITE(GEN6_UCGCTL2, I915_READ(GEN6_UCGCTL2) |
> +		I915_WRITE(GEN6_UCGCTL2,
>  			   GEN6_VFUNIT_CLOCK_GATE_DISABLE);
>  	}
>  
>  	if (INTEL_REVID(dev) <= SKL_REVID_D0) {
>  		/* WaDisableHDCInvalidation:skl */
> -		I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
> +		I915_WRITE(GAM_ECOCHK,
>  			   BDW_DISABLE_HDC_INVALIDATION);
>  
>  		/* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */
> @@ -93,7 +93,7 @@ static void skl_init_clock_gating(struct drm_device *dev)
>  
>  	if (INTEL_REVID(dev) <= SKL_REVID_E0)
>  		/* WaDisableLSQCROPERFforOCL:skl */
> -		I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
> +		I915_WRITE(GEN8_L3SQCREG4,
>  			   GEN8_LQSC_RO_PERF_DIS);
>  }
>  
> @@ -109,12 +109,12 @@ static void bxt_init_clock_gating(struct drm_device *dev)
>  	 * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
>  	 */
>  	 /* WaDisableSDEUnitClockGating:bxt */
> -	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
> +	I915_WRITE(GEN8_UCGCTL6,
>  		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE |
>  		   GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
>  
>  	/* FIXME: apply on A0 only */
> -	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
> +	I915_WRITE(TILECTL, TILECTL_TLBPF);
>  }
>  
>  static void i915_pineview_get_mem_freq(struct drm_device *dev)
> -- 
> 2.1.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Dave Gordon July 6, 2015, 2:07 p.m. UTC | #3
On 06/07/15 13:50, Ville Syrjälä wrote:
> On Mon, Jul 06, 2015 at 02:42:02PM +0200, Daniel Vetter wrote:
>> Especially for workarounds which is stuff that's almost impossible to
>> verify: The initial state from the firmware on boot-up and after
>> resume could be different, which will hide bugs when we do an RMW
>> cycle.
>
> If you're really worried about that then we should then explicitly
> initialize all the registers that might affect stuff.
>
> For a bunch of GT registers we could just do a GPU reset at driver
> init. That that won't help with UCGCTL and such.
>
> I'm also worried that if we don't use RMWs for early parts, the hardware
> folks may still change the default for some ofhte other bits, and then
> we end up clobbering those.

In other drivers, I've found a good pattern to be:

1.	during driver load, snapshot (just once) anything that the BIOS
	may have programmed that we may need later
2.	then reset the h/w and reprogram it totally to our preferred
	values, which may to a greater or lesser degree be derived from
	the saved BIOS settings
3.	during unload, reset the h/w again and reprogram it with the
	BIOS settings

2a.	resume is just like load, except we don't need or want to
	capture the BIOS settings first
3a.	suspend is like unload, except in some cases the BIOS values
	might need to be tweaked when writing them back in order to
	ensure the device doesn't generate further activity.

Step 1 ensures that we don't lose useful settings where the BIOS knows 
better than the driver what the good values are.
Step 2/2a. ensures that the device starts out in a well-defined state, 
regardless of how comprehensively (or badly) the BIOS has set it up.
It seems a reasonable compromise between taking advantage of good BIOSes 
while not getting too much hassle from broken ones.

.Dave.
Daniel Vetter July 6, 2015, 2:58 p.m. UTC | #4
On Mon, Jul 06, 2015 at 01:46:19PM +0100, Damien Lespiau wrote:
> On Mon, Jul 06, 2015 at 02:42:02PM +0200, Daniel Vetter wrote:
> > Especially for workarounds which is stuff that's almost impossible to
> > verify: The initial state from the firmware on boot-up and after
> > resume could be different, which will hide bugs when we do an RMW
> > cycle.
> > 
> > Hence never do them, and if it's required we need a special mask.
> > 
> > Cc: Damien Lespiau <damien.lespiau@intel.com>
> > Cc: Imre Deak <imre.deak@intel.com>
> > Cc: Nick Hoath <nicholas.hoath@intel.com>
> > Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> 
> Eeek. Let's take the problem the other way around: have you verified
> it's OK to zero all those other fields?

Nope, but it's what we do for other workarounds (e.g. the ones we load
through the rings except for one case in the cxt switch wa) and on other
platforms. And in general we've moved away from RMW wherever we can since
it had too much surprises.

It's really just something I spotted while stumbling over a w/a patch for
hsw that we never merged - I don't like the inconsistency. And it has
bitten us in the past.

And yes I haven't done the audit here, but the fact that you suggest we
need one kind proves my point ;-)
-Daniel

> 
> -- 
> Damien
> 
> > ---
> >  drivers/gpu/drm/i915/intel_pm.c | 14 +++++++-------
> >  1 file changed, 7 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> > index 166ae51f5a5b..565f78d6a21d 100644
> > --- a/drivers/gpu/drm/i915/intel_pm.c
> > +++ b/drivers/gpu/drm/i915/intel_pm.c
> > @@ -57,7 +57,7 @@ static void gen9_init_clock_gating(struct drm_device *dev)
> >  	struct drm_i915_private *dev_priv = dev->dev_private;
> >  
> >  	/* WaEnableLbsSlaRetryTimerDecrement:skl */
> > -	I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
> > +	I915_WRITE(BDW_SCRATCH1,
> >  		   GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
> >  }
> >  
> > @@ -72,18 +72,18 @@ static void skl_init_clock_gating(struct drm_device *dev)
> >  		 * WaDisableSDEUnitClockGating:skl
> >  		 * WaSetGAPSunitClckGateDisable:skl
> >  		 */
> > -		I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
> > +		I915_WRITE(GEN8_UCGCTL6,
> >  			   GEN8_GAPSUNIT_CLOCK_GATE_DISABLE |
> >  			   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
> >  
> >  		/* WaDisableVFUnitClockGating:skl */
> > -		I915_WRITE(GEN6_UCGCTL2, I915_READ(GEN6_UCGCTL2) |
> > +		I915_WRITE(GEN6_UCGCTL2,
> >  			   GEN6_VFUNIT_CLOCK_GATE_DISABLE);
> >  	}
> >  
> >  	if (INTEL_REVID(dev) <= SKL_REVID_D0) {
> >  		/* WaDisableHDCInvalidation:skl */
> > -		I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
> > +		I915_WRITE(GAM_ECOCHK,
> >  			   BDW_DISABLE_HDC_INVALIDATION);
> >  
> >  		/* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */
> > @@ -93,7 +93,7 @@ static void skl_init_clock_gating(struct drm_device *dev)
> >  
> >  	if (INTEL_REVID(dev) <= SKL_REVID_E0)
> >  		/* WaDisableLSQCROPERFforOCL:skl */
> > -		I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
> > +		I915_WRITE(GEN8_L3SQCREG4,
> >  			   GEN8_LQSC_RO_PERF_DIS);
> >  }
> >  
> > @@ -109,12 +109,12 @@ static void bxt_init_clock_gating(struct drm_device *dev)
> >  	 * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
> >  	 */
> >  	 /* WaDisableSDEUnitClockGating:bxt */
> > -	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
> > +	I915_WRITE(GEN8_UCGCTL6,
> >  		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE |
> >  		   GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
> >  
> >  	/* FIXME: apply on A0 only */
> > -	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
> > +	I915_WRITE(TILECTL, TILECTL_TLBPF);
> >  }
> >  
> >  static void i915_pineview_get_mem_freq(struct drm_device *dev)
> > -- 
> > 2.1.4
> >
Daniel Vetter July 6, 2015, 3 p.m. UTC | #5
On Mon, Jul 06, 2015 at 03:07:16PM +0100, Dave Gordon wrote:
> On 06/07/15 13:50, Ville Syrjälä wrote:
> >On Mon, Jul 06, 2015 at 02:42:02PM +0200, Daniel Vetter wrote:
> >>Especially for workarounds which is stuff that's almost impossible to
> >>verify: The initial state from the firmware on boot-up and after
> >>resume could be different, which will hide bugs when we do an RMW
> >>cycle.
> >
> >If you're really worried about that then we should then explicitly
> >initialize all the registers that might affect stuff.
> >
> >For a bunch of GT registers we could just do a GPU reset at driver
> >init. That that won't help with UCGCTL and such.
> >
> >I'm also worried that if we don't use RMWs for early parts, the hardware
> >folks may still change the default for some ofhte other bits, and then
> >we end up clobbering those.
> 
> In other drivers, I've found a good pattern to be:
> 
> 1.	during driver load, snapshot (just once) anything that the BIOS
> 	may have programmed that we may need later
> 2.	then reset the h/w and reprogram it totally to our preferred
> 	values, which may to a greater or lesser degree be derived from
> 	the saved BIOS settings
> 3.	during unload, reset the h/w again and reprogram it with the
> 	BIOS settings
> 
> 2a.	resume is just like load, except we don't need or want to
> 	capture the BIOS settings first
> 3a.	suspend is like unload, except in some cases the BIOS values
> 	might need to be tweaked when writing them back in order to
> 	ensure the device doesn't generate further activity.
> 
> Step 1 ensures that we don't lose useful settings where the BIOS knows
> better than the driver what the good values are.
> Step 2/2a. ensures that the device starts out in a well-defined state,
> regardless of how comprehensively (or badly) the BIOS has set it up.
> It seems a reasonable compromise between taking advantage of good BIOSes
> while not getting too much hassle from broken ones.

We've had large-scale save/restore hooks and they're another kind of
trouble: It usually means you end up duplicating between how you touch
registers at runtime and the giant resume code. And there's always going
to be some overlap (our hw engineers are creative enough for that) which
means if there's subtle ordering rules those two paths inevitably will
diverge.
-Daniel
Daniel Vetter July 6, 2015, 3:04 p.m. UTC | #6
On Mon, Jul 06, 2015 at 03:50:49PM +0300, Ville Syrjälä wrote:
> On Mon, Jul 06, 2015 at 02:42:02PM +0200, Daniel Vetter wrote:
> > Especially for workarounds which is stuff that's almost impossible to
> > verify: The initial state from the firmware on boot-up and after
> > resume could be different, which will hide bugs when we do an RMW
> > cycle.
> 
> If you're really worried about that then we should then explicitly
> initialize all the registers that might affect stuff.
> 
> For a bunch of GT registers we could just do a GPU reset at driver
> init. That that won't help with UCGCTL and such.
> 
> I'm also worried that if we don't use RMWs for early parts, the hardware
> folks may still change the default for some ofhte other bits, and then
> we end up clobbering those.

The point is that we'll at least consistently clobber them, which is the
important part. Chasing a bug which only happens when you freshly boot but
not after the first gpu reset (or first resume or the other way round or
whatever) is not fun at all.

If that means we will botch the context a bit then I guess we need better
tooling to compare the actual hw state with what Bspec suggest, including
all the w/a.
-Daniel
Lespiau, Damien July 6, 2015, 3:15 p.m. UTC | #7
On Mon, Jul 06, 2015 at 04:58:19PM +0200, Daniel Vetter wrote:
> On Mon, Jul 06, 2015 at 01:46:19PM +0100, Damien Lespiau wrote:
> > On Mon, Jul 06, 2015 at 02:42:02PM +0200, Daniel Vetter wrote:
> > > Especially for workarounds which is stuff that's almost impossible to
> > > verify: The initial state from the firmware on boot-up and after
> > > resume could be different, which will hide bugs when we do an RMW
> > > cycle.
> > > 
> > > Hence never do them, and if it's required we need a special mask.
> > > 
> > > Cc: Damien Lespiau <damien.lespiau@intel.com>
> > > Cc: Imre Deak <imre.deak@intel.com>
> > > Cc: Nick Hoath <nicholas.hoath@intel.com>
> > > Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> > 
> > Eeek. Let's take the problem the other way around: have you verified
> > it's OK to zero all those other fields?
> 
> Nope, but it's what we do for other workarounds (e.g. the ones we load
> through the rings except for one case in the cxt switch wa) and on other
> platforms. And in general we've moved away from RMW wherever we can since
> it had too much surprises.

I don't think that's really fair. Most W/As through the rings are
touching masked registers, just setting/clearing specific bits.

I just looked at the *_init_clock_gating() function and it's full
of RMW cycles as well. We roughly have no idea of most of the early init
from the firmware and really want to reuse those when we're missing the
info about those bits.

> It's really just something I spotted while stumbling over a w/a patch for
> hsw that we never merged - I don't like the inconsistency. And it has
> bitten us in the past.
> 
> And yes I haven't done the audit here, but the fact that you suggest we
> need one kind proves my point ;-)

I don't mind the spririt of this, but it requires a massive amount of
lore that is currently done in the firmware. Not at all practical with
the amount of knowledge we have on low level units and early init
sequences and W/As.
Daniel Vetter July 6, 2015, 6:32 p.m. UTC | #8
On Mon, Jul 06, 2015 at 04:15:25PM +0100, Damien Lespiau wrote:
> On Mon, Jul 06, 2015 at 04:58:19PM +0200, Daniel Vetter wrote:
> > On Mon, Jul 06, 2015 at 01:46:19PM +0100, Damien Lespiau wrote:
> > > On Mon, Jul 06, 2015 at 02:42:02PM +0200, Daniel Vetter wrote:
> > > > Especially for workarounds which is stuff that's almost impossible to
> > > > verify: The initial state from the firmware on boot-up and after
> > > > resume could be different, which will hide bugs when we do an RMW
> > > > cycle.
> > > > 
> > > > Hence never do them, and if it's required we need a special mask.
> > > > 
> > > > Cc: Damien Lespiau <damien.lespiau@intel.com>
> > > > Cc: Imre Deak <imre.deak@intel.com>
> > > > Cc: Nick Hoath <nicholas.hoath@intel.com>
> > > > Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> > > 
> > > Eeek. Let's take the problem the other way around: have you verified
> > > it's OK to zero all those other fields?
> > 
> > Nope, but it's what we do for other workarounds (e.g. the ones we load
> > through the rings except for one case in the cxt switch wa) and on other
> > platforms. And in general we've moved away from RMW wherever we can since
> > it had too much surprises.
> 
> I don't think that's really fair. Most W/As through the rings are
> touching masked registers, just setting/clearing specific bits.
> 
> I just looked at the *_init_clock_gating() function and it's full
> of RMW cycles as well. We roughly have no idea of most of the early init
> from the firmware and really want to reuse those when we're missing the
> info about those bits.
> 
> > It's really just something I spotted while stumbling over a w/a patch for
> > hsw that we never merged - I don't like the inconsistency. And it has
> > bitten us in the past.
> > 
> > And yes I haven't done the audit here, but the fact that you suggest we
> > need one kind proves my point ;-)
> 
> I don't mind the spririt of this, but it requires a massive amount of
> lore that is currently done in the firmware. Not at all practical with
> the amount of knowledge we have on low level units and early init
> sequences and W/As.

Yeah I think I checked a biased sample for this case. Specically I ended
up looking at GEN6_UCGCTL2 where all pre-gen9 functions don't do a RMW.
But reviewing a lot more of the more modern clock gating code we seem to
be simply inconsistent all across the place. I guess if someone would be
really bored we could go through all modern-ish w/a and check that all
platforms apply them in a uniform way, instead of the current MO where we
walk the wa db mostly on a per-platform query. I'm just really grumpy
about w/a in general since we have only shitty options to validate that we
have them all, so the best we can aim for is consistency, which we don't
have either.

I guess everyone just move on and hope nothing breaks is the right option
:(
-Daniel
Paulo Zanoni July 6, 2015, 7:23 p.m. UTC | #9
2015-07-06 12:04 GMT-03:00 Daniel Vetter <daniel@ffwll.ch>:
> On Mon, Jul 06, 2015 at 03:50:49PM +0300, Ville Syrjälä wrote:
>> On Mon, Jul 06, 2015 at 02:42:02PM +0200, Daniel Vetter wrote:
>> > Especially for workarounds which is stuff that's almost impossible to
>> > verify: The initial state from the firmware on boot-up and after
>> > resume could be different, which will hide bugs when we do an RMW
>> > cycle.
>>
>> If you're really worried about that then we should then explicitly
>> initialize all the registers that might affect stuff.
>>
>> For a bunch of GT registers we could just do a GPU reset at driver
>> init. That that won't help with UCGCTL and such.
>>
>> I'm also worried that if we don't use RMWs for early parts, the hardware
>> folks may still change the default for some ofhte other bits, and then
>> we end up clobbering those.
>
> The point is that we'll at least consistently clobber them, which is the
> important part. Chasing a bug which only happens when you freshly boot but
> not after the first gpu reset (or first resume or the other way round or
> whatever) is not fun at all.

I think there are possibly other ways to be consistent. We could, for
example, save the values which we think are correct at boot - even if
we rely on the BIOS - and then (optionally) check them at every resume
or runtime resume. Maybe there are even other ideas for that.

But I do see the value in what you're doing, even though I'm also
afraid of the possible bugs brought by it, and I like the idea of
starting this with new gens only. If you decide to keep this strategy,
can you please print on debugfs when the RMW value is different from
the non-RMW value?

>
> If that means we will botch the context a bit then I guess we need better
> tooling to compare the actual hw state with what Bspec suggest, including
> all the w/a.
> -Daniel
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Daniel Vetter July 6, 2015, 9:35 p.m. UTC | #10
On Mon, Jul 06, 2015 at 04:23:23PM -0300, Paulo Zanoni wrote:
> 2015-07-06 12:04 GMT-03:00 Daniel Vetter <daniel@ffwll.ch>:
> > On Mon, Jul 06, 2015 at 03:50:49PM +0300, Ville Syrjälä wrote:
> >> On Mon, Jul 06, 2015 at 02:42:02PM +0200, Daniel Vetter wrote:
> >> > Especially for workarounds which is stuff that's almost impossible to
> >> > verify: The initial state from the firmware on boot-up and after
> >> > resume could be different, which will hide bugs when we do an RMW
> >> > cycle.
> >>
> >> If you're really worried about that then we should then explicitly
> >> initialize all the registers that might affect stuff.
> >>
> >> For a bunch of GT registers we could just do a GPU reset at driver
> >> init. That that won't help with UCGCTL and such.
> >>
> >> I'm also worried that if we don't use RMWs for early parts, the hardware
> >> folks may still change the default for some ofhte other bits, and then
> >> we end up clobbering those.
> >
> > The point is that we'll at least consistently clobber them, which is the
> > important part. Chasing a bug which only happens when you freshly boot but
> > not after the first gpu reset (or first resume or the other way round or
> > whatever) is not fun at all.
> 
> I think there are possibly other ways to be consistent. We could, for
> example, save the values which we think are correct at boot - even if
> we rely on the BIOS - and then (optionally) check them at every resume
> or runtime resume. Maybe there are even other ideas for that.
> 
> But I do see the value in what you're doing, even though I'm also
> afraid of the possible bugs brought by it, and I like the idea of
> starting this with new gens only. If you decide to keep this strategy,
> can you please print on debugfs when the RMW value is different from
> the non-RMW value?

We have a few testcases somewhere which try to make sure the w/a stick
after the usual suspects. Unfortunately the kernel instrumentation is very
weak for that (atm it only covers the render ring init w/a we do). We
could try to extend that a bit.

Or we could start screaming into dmesg if the rmw has an effect, like
you're suggesting.
-Daneil
Shuang He July 7, 2015, 2:24 p.m. UTC | #11
Tested-By: Intel Graphics QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
Task id: 6729
-------------------------------------Summary-------------------------------------
Platform          Delta          drm-intel-nightly          Series Applied
ILK                 -1              302/302              301/302
SNB                                  312/316              312/316
IVB                                  345/345              345/345
BYT                 -3              289/289              286/289
HSW                                  382/382              382/382
-------------------------------------Detailed-------------------------------------
Platform  Test                                drm-intel-nightly          Series Applied
*ILK  igt@gem_reloc_vs_gpu@forked-interruptible-thrashing      PASS(1)      DMESG_WARN(1)
(dmesg patch applied)drm:i915_hangcheck_elapsed[i915]]*ERROR*Hangcheck_timer_elapsed...bsd_ring_idle@Hangcheck timer elapsed... bsd ring idle
*BYT  igt@gem_partial_pwrite_pread@reads      PASS(1)      FAIL(1)
*BYT  igt@gem_partial_pwrite_pread@reads-display      PASS(1)      FAIL(1)
*BYT  igt@gem_partial_pwrite_pread@reads-uncached      PASS(1)      FAIL(1)
Note: You need to pay more attention to line start with '*'
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 166ae51f5a5b..565f78d6a21d 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -57,7 +57,7 @@  static void gen9_init_clock_gating(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	/* WaEnableLbsSlaRetryTimerDecrement:skl */
-	I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
+	I915_WRITE(BDW_SCRATCH1,
 		   GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
 }
 
@@ -72,18 +72,18 @@  static void skl_init_clock_gating(struct drm_device *dev)
 		 * WaDisableSDEUnitClockGating:skl
 		 * WaSetGAPSunitClckGateDisable:skl
 		 */
-		I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
+		I915_WRITE(GEN8_UCGCTL6,
 			   GEN8_GAPSUNIT_CLOCK_GATE_DISABLE |
 			   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
 
 		/* WaDisableVFUnitClockGating:skl */
-		I915_WRITE(GEN6_UCGCTL2, I915_READ(GEN6_UCGCTL2) |
+		I915_WRITE(GEN6_UCGCTL2,
 			   GEN6_VFUNIT_CLOCK_GATE_DISABLE);
 	}
 
 	if (INTEL_REVID(dev) <= SKL_REVID_D0) {
 		/* WaDisableHDCInvalidation:skl */
-		I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
+		I915_WRITE(GAM_ECOCHK,
 			   BDW_DISABLE_HDC_INVALIDATION);
 
 		/* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */
@@ -93,7 +93,7 @@  static void skl_init_clock_gating(struct drm_device *dev)
 
 	if (INTEL_REVID(dev) <= SKL_REVID_E0)
 		/* WaDisableLSQCROPERFforOCL:skl */
-		I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
+		I915_WRITE(GEN8_L3SQCREG4,
 			   GEN8_LQSC_RO_PERF_DIS);
 }
 
@@ -109,12 +109,12 @@  static void bxt_init_clock_gating(struct drm_device *dev)
 	 * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
 	 */
 	 /* WaDisableSDEUnitClockGating:bxt */
-	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
+	I915_WRITE(GEN8_UCGCTL6,
 		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE |
 		   GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
 
 	/* FIXME: apply on A0 only */
-	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
+	I915_WRITE(TILECTL, TILECTL_TLBPF);
 }
 
 static void i915_pineview_get_mem_freq(struct drm_device *dev)