diff mbox

[06/11] drm/i915: Save all MMIO WAs and apply them at a later time

Message ID 1507745721-4094-7-git-send-email-oscar.mateo@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

oscar.mateo@intel.com Oct. 11, 2017, 6:15 p.m. UTC
By doing this, we can dump these workarounds in debugfs for validation (which,
at the moment, we are only able to do for the contexts WAs).

v2:
  - Wrong macro used for MMIO set bit masked
  - Improved naming
  - Rebased

Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c          |   5 +
 drivers/gpu/drm/i915/i915_drv.h          |   8 +-
 drivers/gpu/drm/i915/i915_reg.h          |   1 +
 drivers/gpu/drm/i915/intel_workarounds.c | 382 +++++++++++++++++--------------
 drivers/gpu/drm/i915/intel_workarounds.h |   1 +
 5 files changed, 223 insertions(+), 174 deletions(-)

Comments

Joonas Lahtinen Oct. 12, 2017, 10:35 a.m. UTC | #1
On Wed, 2017-10-11 at 11:15 -0700, Oscar Mateo wrote:
> By doing this, we can dump these workarounds in debugfs for
> validation (which,
> at the moment, we are only able to do for the contexts WAs).
> 
> v2:
>   - Wrong macro used for MMIO set bit masked
>   - Improved naming
>   - Rebased
> 
> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>

<SNIP>

> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1960,12 +1960,16 @@ struct i915_wa_reg {
>  	u32 mask;
>  };
>  
> -#define I915_MAX_WA_REGS 16
> +#define I915_MAX_CTX_WA_REGS 16
> +#define I915_MAX_MMIO_WA_REGS 32
>  
>  struct i915_workarounds {
> -	struct i915_wa_reg ctx_wa_reg[I915_MAX_WA_REGS];
> +	struct i915_wa_reg ctx_wa_reg[I915_MAX_CTX_WA_REGS];
>  	u32 ctx_wa_count;
>  
> +	struct i915_wa_reg mmio_wa_reg[I915_MAX_MMIO_WA_REGS];
> +	u32 mmio_wa_count;
> +
>  	u32 hw_whitelist_count[I915_NUM_ENGINES];
>  };

Could we instead consider a constant structure with platform bitmasks?
If that's not dynamic enough, then a bitmap which is initialized by the
platform bitmasks as a default. So instead of running code to add to
list, make it bit more declarative. Pseudo-code;


struct i915_workaround {
	u16 gen_mask;
	enum {
		I915_WA_CTX,
		I915_WA_MMIO,
		I915_WA_WHITELIST,
	} type;
	u32 reg;
}

... elsewhere in .c file

static const struct i915_workaround i915_workarounds[] = { {
	/* WaSomethingSomewhereUMDFoo:skl */
	.gen_mask = INTEL_GEN_MASK(X, Y),
	.type = I915_WA_CTX,
	.reg = ...
} };

Regards, Joonas
oscar.mateo@intel.com Oct. 13, 2017, 8:49 p.m. UTC | #2
On 10/12/2017 03:35 AM, Joonas Lahtinen wrote:
> On Wed, 2017-10-11 at 11:15 -0700, Oscar Mateo wrote:
>> By doing this, we can dump these workarounds in debugfs for
>> validation (which,
>> at the moment, we are only able to do for the contexts WAs).
>>
>> v2:
>>    - Wrong macro used for MMIO set bit masked
>>    - Improved naming
>>    - Rebased
>>
>> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> <SNIP>
>
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -1960,12 +1960,16 @@ struct i915_wa_reg {
>>   	u32 mask;
>>   };
>>   
>> -#define I915_MAX_WA_REGS 16
>> +#define I915_MAX_CTX_WA_REGS 16
>> +#define I915_MAX_MMIO_WA_REGS 32
>>   
>>   struct i915_workarounds {
>> -	struct i915_wa_reg ctx_wa_reg[I915_MAX_WA_REGS];
>> +	struct i915_wa_reg ctx_wa_reg[I915_MAX_CTX_WA_REGS];
>>   	u32 ctx_wa_count;
>>   
>> +	struct i915_wa_reg mmio_wa_reg[I915_MAX_MMIO_WA_REGS];
>> +	u32 mmio_wa_count;
>> +
>>   	u32 hw_whitelist_count[I915_NUM_ENGINES];
>>   };
> Could we instead consider a constant structure with platform bitmasks?
> If that's not dynamic enough, then a bitmap which is initialized by the
> platform bitmasks as a default. So instead of running code to add to
> list, make it bit more declarative. Pseudo-code;
>
>
> struct i915_workaround {
> 	u16 gen_mask;
> 	enum {
> 		I915_WA_CTX,
> 		I915_WA_MMIO,
> 		I915_WA_WHITELIST,
> 	} type;
> 	u32 reg;
> }
>
> ... elsewhere in .c file
>
> static const struct i915_workaround i915_workarounds[] = { {
> 	/* WaSomethingSomewhereUMDFoo:skl */
> 	.gen_mask = INTEL_GEN_MASK(X, Y),
> 	.type = I915_WA_CTX,
> 	.reg = ...
> } };
>
> Regards, Joonas

I considered it, but we have some workarounds that are even more dynamic 
than that. E.g.:

* WaCompressedResourceSamplerPbeMediaNewHashMode depends on 
HAS_LLC(dev_priv)
* skl_tune_iz_hashing depends on number of subslices (although maybe 
this is not technically a WA?)
* WaGttCachingOffByDefault needs HAS_PAGE_SIZES(dev_priv, 
I915_GTT_PAGE_SIZE_2M)
* WaPsrDPRSUnmaskVBlankInSRD is applied for_each_pipe
* Wa #1181 needs HAS_PCH_CNP(dev_priv)
* ...

We even have a WA (WaTempDisableDOPClkGating) where the new design is 
not dynamic enough :(

I guess we could add a callback pointer to the table for those WAs that 
need extra information. Maybe even a "pre" and a "post" callback 
pointers (to cover that pesky WaTempDisableDOPClkGating).

If this is where we want to go, I can write a patch, but I believe it 
would be better to land this first (code review is critical for these 
kind of changes, and it is easier to first review that all WAs make it 
to i915_workarounds.c correctly, and then review that they are all 
transformed to a static table correctly).

-- Oscar
Joonas Lahtinen Oct. 16, 2017, 10:34 a.m. UTC | #3
On Fri, 2017-10-13 at 13:49 -0700, Oscar Mateo wrote:
> 
> On 10/12/2017 03:35 AM, Joonas Lahtinen wrote:
> > On Wed, 2017-10-11 at 11:15 -0700, Oscar Mateo wrote:
> > > By doing this, we can dump these workarounds in debugfs for
> > > validation (which,
> > > at the moment, we are only able to do for the contexts WAs).
> > > 
> > > v2:
> > >    - Wrong macro used for MMIO set bit masked
> > >    - Improved naming
> > >    - Rebased
> > > 
> > > Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> > > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > 
> > <SNIP>
> > 
> > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > @@ -1960,12 +1960,16 @@ struct i915_wa_reg {
> > >   	u32 mask;
> > >   };
> > >   
> > > -#define I915_MAX_WA_REGS 16
> > > +#define I915_MAX_CTX_WA_REGS 16
> > > +#define I915_MAX_MMIO_WA_REGS 32
> > >   
> > >   struct i915_workarounds {
> > > -	struct i915_wa_reg ctx_wa_reg[I915_MAX_WA_REGS];
> > > +	struct i915_wa_reg ctx_wa_reg[I915_MAX_CTX_WA_REGS];
> > >   	u32 ctx_wa_count;
> > >   
> > > +	struct i915_wa_reg mmio_wa_reg[I915_MAX_MMIO_WA_REGS];
> > > +	u32 mmio_wa_count;
> > > +
> > >   	u32 hw_whitelist_count[I915_NUM_ENGINES];
> > >   };
> > 
> > Could we instead consider a constant structure with platform bitmasks?
> > If that's not dynamic enough, then a bitmap which is initialized by the
> > platform bitmasks as a default. So instead of running code to add to
> > list, make it bit more declarative. Pseudo-code;
> > 
> > 
> > struct i915_workaround {
> > 	u16 gen_mask;
> > 	enum {
> > 		I915_WA_CTX,
> > 		I915_WA_MMIO,
> > 		I915_WA_WHITELIST,
> > 	} type;
> > 	u32 reg;
> > }
> > 
> > ... elsewhere in .c file
> > 
> > static const struct i915_workaround i915_workarounds[] = { {
> > 	/* WaSomethingSomewhereUMDFoo:skl */
> > 	.gen_mask = INTEL_GEN_MASK(X, Y),
> > 	.type = I915_WA_CTX,
> > 	.reg = ...
> > } };
> > 
> > Regards, Joonas
> 
> I considered it, but we have some workarounds that are even more dynamic 
> than that. E.g.:
> 
> * WaCompressedResourceSamplerPbeMediaNewHashMode depends on 
> HAS_LLC(dev_priv)
> * skl_tune_iz_hashing depends on number of subslices (although maybe 
> this is not technically a WA?)
> * WaGttCachingOffByDefault needs HAS_PAGE_SIZES(dev_priv, 
> I915_GTT_PAGE_SIZE_2M)
> * WaPsrDPRSUnmaskVBlankInSRD is applied for_each_pipe

Could be multiple Wa lines each with HAS_PIPE() condition.

> * Wa #1181 needs HAS_PCH_CNP(dev_priv)
> * ...
> 
> We even have a WA (WaTempDisableDOPClkGating) where the new design is 
> not dynamic enough :(

I was thinking the array would cover the simple register writes, I
think most of the detection problems could be covered by a simple probe
function. One could consider caching the probing result to a bitmap.

> I guess we could add a callback pointer to the table for those WAs that 
> need extra information. Maybe even a "pre" and a "post" callback 
> pointers (to cover that pesky WaTempDisableDOPClkGating).

You'd still have to keep some state between pre and post. Maybe just
have I915_WA_FUNC and then a callback to apply the ones not fitting the
"detection" + "simple register write" scheme.

> If this is where we want to go, I can write a patch, but I believe it 
> would be better to land this first (code review is critical for these 
> kind of changes, and it is easier to first review that all WAs make it 
> to i915_workarounds.c correctly, and then review that they are all 
> transformed to a static table correctly).

First collecting the WA code to same source file is a good idea if that
can be made conveniently, but I would not transform them to arrays or
some other intermediate form like this patch proposes. Instead after
the initial code motion, convert straight to the agreed format.

This is a good clarification to the WAs, so I like the general idea.

Regards, Joonas
oscar.mateo@intel.com Nov. 3, 2017, 10:56 p.m. UTC | #4
On 10/16/2017 03:34 AM, Joonas Lahtinen wrote:
> On Fri, 2017-10-13 at 13:49 -0700, Oscar Mateo wrote:
>> On 10/12/2017 03:35 AM, Joonas Lahtinen wrote:
>>> On Wed, 2017-10-11 at 11:15 -0700, Oscar Mateo wrote:
>>>> By doing this, we can dump these workarounds in debugfs for
>>>> validation (which,
>>>> at the moment, we are only able to do for the contexts WAs).
>>>>
>>>> v2:
>>>>     - Wrong macro used for MMIO set bit masked
>>>>     - Improved naming
>>>>     - Rebased
>>>>
>>>> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>>> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>>> <SNIP>
>>>
>>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>>> @@ -1960,12 +1960,16 @@ struct i915_wa_reg {
>>>>    	u32 mask;
>>>>    };
>>>>    
>>>> -#define I915_MAX_WA_REGS 16
>>>> +#define I915_MAX_CTX_WA_REGS 16
>>>> +#define I915_MAX_MMIO_WA_REGS 32
>>>>    
>>>>    struct i915_workarounds {
>>>> -	struct i915_wa_reg ctx_wa_reg[I915_MAX_WA_REGS];
>>>> +	struct i915_wa_reg ctx_wa_reg[I915_MAX_CTX_WA_REGS];
>>>>    	u32 ctx_wa_count;
>>>>    
>>>> +	struct i915_wa_reg mmio_wa_reg[I915_MAX_MMIO_WA_REGS];
>>>> +	u32 mmio_wa_count;
>>>> +
>>>>    	u32 hw_whitelist_count[I915_NUM_ENGINES];
>>>>    };
>>> Could we instead consider a constant structure with platform bitmasks?
>>> If that's not dynamic enough, then a bitmap which is initialized by the
>>> platform bitmasks as a default. So instead of running code to add to
>>> list, make it bit more declarative. Pseudo-code;
>>>
>>>
>>> struct i915_workaround {
>>> 	u16 gen_mask;
>>> 	enum {
>>> 		I915_WA_CTX,
>>> 		I915_WA_MMIO,
>>> 		I915_WA_WHITELIST,
>>> 	} type;
>>> 	u32 reg;
>>> }
>>>
>>> ... elsewhere in .c file
>>>
>>> static const struct i915_workaround i915_workarounds[] = { {
>>> 	/* WaSomethingSomewhereUMDFoo:skl */
>>> 	.gen_mask = INTEL_GEN_MASK(X, Y),
>>> 	.type = I915_WA_CTX,
>>> 	.reg = ...
>>> } };
>>>
>>> Regards, Joonas
>> I considered it, but we have some workarounds that are even more dynamic
>> than that. E.g.:
>>
>> * WaCompressedResourceSamplerPbeMediaNewHashMode depends on
>> HAS_LLC(dev_priv)
>> * skl_tune_iz_hashing depends on number of subslices (although maybe
>> this is not technically a WA?)
>> * WaGttCachingOffByDefault needs HAS_PAGE_SIZES(dev_priv,
>> I915_GTT_PAGE_SIZE_2M)
>> * WaPsrDPRSUnmaskVBlankInSRD is applied for_each_pipe
> Could be multiple Wa lines each with HAS_PIPE() condition.
>
>> * Wa #1181 needs HAS_PCH_CNP(dev_priv)
>> * ...
>>
>> We even have a WA (WaTempDisableDOPClkGating) where the new design is
>> not dynamic enough :(
> I was thinking the array would cover the simple register writes, I
> think most of the detection problems could be covered by a simple probe
> function. One could consider caching the probing result to a bitmap.
>
>> I guess we could add a callback pointer to the table for those WAs that
>> need extra information. Maybe even a "pre" and a "post" callback
>> pointers (to cover that pesky WaTempDisableDOPClkGating).
> You'd still have to keep some state between pre and post. Maybe just
> have I915_WA_FUNC and then a callback to apply the ones not fitting the
> "detection" + "simple register write" scheme.
>

It took me some time, but I found the flaw in this design: I cannot use 
a callback to apply the ones not fitting the"detection" + "simple 
register write" scheme as you mention, because then debugfs will not 
know about those (which was the point of this whole exercise).

I tried something like this:

typedef bool (* wa_pre_hook_func)(struct drm_i915_private *dev_priv,
                   const struct i915_wa_reg *wa,
                   u32 *mask, u32 *value, u32 *data);
typedef void (* wa_post_hook_func)(struct drm_i915_private *dev_priv,
                    const struct i915_wa_reg *wa,
                    u32 data);

In case mask/value need to be recalculated (e.g. skl_tune_iz_hashing or 
WaGttCachingOffByDefault) but then I need to call the pre_hook on 
debugfs, which I might not want to do in all cases (see 
WaProgramL3SqcReg1Default).
I can special-case the problematic WAs, or even left them out of the 
array, but somehow that seems worse than what we already had...

Thoughts?

>> If this is where we want to go, I can write a patch, but I believe it
>> would be better to land this first (code review is critical for these
>> kind of changes, and it is easier to first review that all WAs make it
>> to i915_workarounds.c correctly, and then review that they are all
>> transformed to a static table correctly).
> First collecting the WA code to same source file is a good idea if that
> can be made conveniently, but I would not transform them to arrays or
> some other intermediate form like this patch proposes. Instead after
> the initial code motion, convert straight to the agreed format.
>
> This is a good clarification to the WAs, so I like the general idea.
>
> Regards, Joonas
Joonas Lahtinen Nov. 8, 2017, 11:47 a.m. UTC | #5
On Fri, 2017-11-03 at 15:56 -0700, Oscar Mateo wrote:
> 
> On 10/16/2017 03:34 AM, Joonas Lahtinen wrote:
> > On Fri, 2017-10-13 at 13:49 -0700, Oscar Mateo wrote:
> > > On 10/12/2017 03:35 AM, Joonas Lahtinen wrote:
> > > > On Wed, 2017-10-11 at 11:15 -0700, Oscar Mateo wrote:
> > > > > By doing this, we can dump these workarounds in debugfs for
> > > > > validation (which,
> > > > > at the moment, we are only able to do for the contexts WAs).
> > > > > 
> > > > > v2:
> > > > >     - Wrong macro used for MMIO set bit masked
> > > > >     - Improved naming
> > > > >     - Rebased
> > > > > 
> > > > > Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> > > > > Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > > > > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > > > 
> > > > <SNIP>
> > > > 
> > > > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > > > @@ -1960,12 +1960,16 @@ struct i915_wa_reg {
> > > > >    	u32 mask;
> > > > >    };
> > > > >    
> > > > > -#define I915_MAX_WA_REGS 16
> > > > > +#define I915_MAX_CTX_WA_REGS 16
> > > > > +#define I915_MAX_MMIO_WA_REGS 32
> > > > >    
> > > > >    struct i915_workarounds {
> > > > > -	struct i915_wa_reg ctx_wa_reg[I915_MAX_WA_REGS];
> > > > > +	struct i915_wa_reg ctx_wa_reg[I915_MAX_CTX_WA_REGS];
> > > > >    	u32 ctx_wa_count;
> > > > >    
> > > > > +	struct i915_wa_reg mmio_wa_reg[I915_MAX_MMIO_WA_REGS];
> > > > > +	u32 mmio_wa_count;
> > > > > +
> > > > >    	u32 hw_whitelist_count[I915_NUM_ENGINES];
> > > > >    };
> > > > 
> > > > Could we instead consider a constant structure with platform bitmasks?
> > > > If that's not dynamic enough, then a bitmap which is initialized by the
> > > > platform bitmasks as a default. So instead of running code to add to
> > > > list, make it bit more declarative. Pseudo-code;
> > > > 
> > > > 
> > > > struct i915_workaround {
> > > > 	u16 gen_mask;
> > > > 	enum {
> > > > 		I915_WA_CTX,
> > > > 		I915_WA_MMIO,
> > > > 		I915_WA_WHITELIST,
> > > > 	} type;
> > > > 	u32 reg;
> > > > }
> > > > 
> > > > ... elsewhere in .c file
> > > > 
> > > > static const struct i915_workaround i915_workarounds[] = { {
> > > > 	/* WaSomethingSomewhereUMDFoo:skl */
> > > > 	.gen_mask = INTEL_GEN_MASK(X, Y),
> > > > 	.type = I915_WA_CTX,
> > > > 	.reg = ...
> > > > } };
> > > > 
> > > > Regards, Joonas
> > > 
> > > I considered it, but we have some workarounds that are even more dynamic
> > > than that. E.g.:
> > > 
> > > * WaCompressedResourceSamplerPbeMediaNewHashMode depends on
> > > HAS_LLC(dev_priv)
> > > * skl_tune_iz_hashing depends on number of subslices (although maybe
> > > this is not technically a WA?)
> > > * WaGttCachingOffByDefault needs HAS_PAGE_SIZES(dev_priv,
> > > I915_GTT_PAGE_SIZE_2M)
> > > * WaPsrDPRSUnmaskVBlankInSRD is applied for_each_pipe
> > 
> > Could be multiple Wa lines each with HAS_PIPE() condition.
> > 
> > > * Wa #1181 needs HAS_PCH_CNP(dev_priv)
> > > * ...
> > > 
> > > We even have a WA (WaTempDisableDOPClkGating) where the new design is
> > > not dynamic enough :(
> > 
> > I was thinking the array would cover the simple register writes, I
> > think most of the detection problems could be covered by a simple probe
> > function. One could consider caching the probing result to a bitmap.
> > 
> > > I guess we could add a callback pointer to the table for those WAs that
> > > need extra information. Maybe even a "pre" and a "post" callback
> > > pointers (to cover that pesky WaTempDisableDOPClkGating).
> > 
> > You'd still have to keep some state between pre and post. Maybe just
> > have I915_WA_FUNC and then a callback to apply the ones not fitting the
> > "detection" + "simple register write" scheme.
> > 
> 
> It took me some time, but I found the flaw in this design: I cannot use 
> a callback to apply the ones not fitting the"detection" + "simple 
> register write" scheme as you mention, because then debugfs will not 
> know about those (which was the point of this whole exercise).
> 
> I tried something like this:
> 
> typedef bool (* wa_pre_hook_func)(struct drm_i915_private *dev_priv,
>                    const struct i915_wa_reg *wa,
>                    u32 *mask, u32 *value, u32 *data);
> typedef void (* wa_post_hook_func)(struct drm_i915_private *dev_priv,
>                     const struct i915_wa_reg *wa,
>                     u32 data);
> 
> In case mask/value need to be recalculated (e.g. skl_tune_iz_hashing or 
> WaGttCachingOffByDefault) but then I need to call the pre_hook on 
> debugfs, which I might not want to do in all cases (see 
> WaProgramL3SqcReg1Default).
> I can special-case the problematic WAs, or even left them out of the 
> array, but somehow that seems worse than what we already had...
> 
> Thoughts?

I'm not sure I see the issue. If looking at gen8_set_l3sqc_credits

You'd have the specialized apply() callback which sets the
GEN8_L3SQCREG1 register respecting the WaTempDisableDOPClkGating.

When you have a WA for applying a WA, I don't see how we can completely
avoid the specialized functions.

For debugfs, wouldn't you dump the value of what GEN8_L3SQCREG1 is for
a specific dev_priv? If there's more than that, I guess we can't but
store that state in dev_priv. And have an optional hook function to
fill in whatever depends on runtime and we want displayed in debugfs?

I think it might be best to start with moving all the WAs to a file,
and then continue looking at how we can best present them.

Regards, Joonas

> 
> > > If this is where we want to go, I can write a patch, but I believe it
> > > would be better to land this first (code review is critical for these
> > > kind of changes, and it is easier to first review that all WAs make it
> > > to i915_workarounds.c correctly, and then review that they are all
> > > transformed to a static table correctly).
> > 
> > First collecting the WA code to same source file is a good idea if that
> > can be made conveniently, but I would not transform them to arrays or
> > some other intermediate form like this patch proposes. Instead after
> > the initial code motion, convert straight to the agreed format.
> > 
> > This is a good clarification to the WAs, so I like the general idea.
> > 
> > Regards, Joonas
> 
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index f1e6517..6e9a0da 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -49,6 +49,7 @@ 
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include "i915_vgpu.h"
+#include "intel_workarounds.h"
 #include "intel_drv.h"
 #include "intel_uc.h"
 
@@ -886,6 +887,10 @@  static int i915_driver_init_early(struct drm_i915_private *dev_priv,
 	BUG_ON(device_info->gen > sizeof(device_info->gen_mask) * BITS_PER_BYTE);
 	device_info->gen_mask = BIT(device_info->gen - 1);
 
+	ret = intel_mmio_workarounds_init(dev_priv);
+	if (ret < 0)
+		return ret;
+
 	spin_lock_init(&dev_priv->irq_lock);
 	spin_lock_init(&dev_priv->gpu_error.lock);
 	mutex_init(&dev_priv->backlight_lock);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a528b0b..11e8658 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1960,12 +1960,16 @@  struct i915_wa_reg {
 	u32 mask;
 };
 
-#define I915_MAX_WA_REGS 16
+#define I915_MAX_CTX_WA_REGS 16
+#define I915_MAX_MMIO_WA_REGS 32
 
 struct i915_workarounds {
-	struct i915_wa_reg ctx_wa_reg[I915_MAX_WA_REGS];
+	struct i915_wa_reg ctx_wa_reg[I915_MAX_CTX_WA_REGS];
 	u32 ctx_wa_count;
 
+	struct i915_wa_reg mmio_wa_reg[I915_MAX_MMIO_WA_REGS];
+	u32 mmio_wa_count;
+
 	u32 hw_whitelist_count[I915_NUM_ENGINES];
 };
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index d2d0a83..5858f5f 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -7041,6 +7041,7 @@  enum {
  */
 #define  L3_GENERAL_PRIO_CREDITS(x)		(((x) >> 1) << 19)
 #define  L3_HIGH_PRIO_CREDITS(x)		(((x) >> 1) << 14)
+#define  L3_PRIO_CREDITS_MASK			(0x1f << 19) | (0x1f << 14)
 
 #define GEN7_L3CNTLREG1				_MMIO(0xB01C)
 #define  GEN7_WA_FOR_GEN7_L3_CONTROL			0x3C47FF8C
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
index c7d2bf9..95a9b75 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -31,7 +31,7 @@  static int ctx_wa_add(struct drm_i915_private *dev_priv,
 {
 	const u32 idx = dev_priv->workarounds.ctx_wa_count;
 
-	if (WARN_ON(idx >= I915_MAX_WA_REGS))
+	if (WARN_ON(idx >= I915_MAX_CTX_WA_REGS))
 		return -ENOSPC;
 
 	dev_priv->workarounds.ctx_wa_reg[idx].addr = addr;
@@ -513,64 +513,97 @@  int intel_ctx_workarounds_emit(struct drm_i915_gem_request *req)
 	return 0;
 }
 
-static void bdw_mmio_workarounds_apply(struct drm_i915_private *dev_priv)
+static int mmio_wa_add(struct drm_i915_private *dev_priv,
+		       i915_reg_t addr,
+		       const u32 mask, const u32 val)
+{
+	const u32 idx = dev_priv->workarounds.mmio_wa_count;
+
+	if (WARN_ON(idx >= I915_MAX_MMIO_WA_REGS))
+		return -ENOSPC;
+
+	dev_priv->workarounds.mmio_wa_reg[idx].addr = addr;
+	dev_priv->workarounds.mmio_wa_reg[idx].value = val;
+	dev_priv->workarounds.mmio_wa_reg[idx].mask = mask;
+
+	dev_priv->workarounds.mmio_wa_count++;
+
+	return 0;
+}
+
+#define MMIO_WA_REG(addr, mask, val) do { \
+		const int r = mmio_wa_add(dev_priv, (addr), (mask), (val)); \
+		if (r) \
+			return r; \
+	} while (0)
+
+#define MMIO_WA_SET_BIT(addr, mask) \
+	MMIO_WA_REG(addr, (mask), (mask))
+
+#define MMIO_WA_SET_BIT_MASKED(addr, mask) \
+	MMIO_WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
+
+#define MMIO_WA_CLR_BIT(addr, mask) \
+	MMIO_WA_REG(addr, (mask), 0)
+
+#define MMIO_WA_SET_FIELD(addr, mask, value) \
+	MMIO_WA_REG(addr, (mask), (value))
+
+static int bdw_mmio_workarounds_init(struct drm_i915_private *dev_priv)
 {
 	/* The GTT cache must be disabled if the system is using 2M pages. */
-	bool can_use_gtt_cache = !HAS_PAGE_SIZES(dev_priv,
-						 I915_GTT_PAGE_SIZE_2M);
+	bool can_use_gtt_cache = !HAS_PAGE_SIZES(dev_priv, I915_GTT_PAGE_SIZE_2M);
 	enum pipe pipe;
 
 	/* WaSwitchSolVfFArbitrationPriority:bdw */
-	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
+	MMIO_WA_SET_BIT(GAM_ECOCHK, HSW_ECOCHK_ARB_PRIO_SOL);
 
 	/* WaPsrDPAMaskVBlankInSRD:bdw */
-	I915_WRITE(CHICKEN_PAR1_1,
-		   I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
+	MMIO_WA_SET_BIT(CHICKEN_PAR1_1, DPA_MASK_VBLANK_SRD);
 
 	/* WaPsrDPRSUnmaskVBlankInSRD:bdw */
-	for_each_pipe(dev_priv, pipe) {
-		I915_WRITE(CHICKEN_PIPESL_1(pipe),
-			   I915_READ(CHICKEN_PIPESL_1(pipe)) |
-			   BDW_DPRS_MASK_VBLANK_SRD);
-	}
+	for_each_pipe(dev_priv, pipe)
+		MMIO_WA_SET_BIT(CHICKEN_PIPESL_1(pipe), BDW_DPRS_MASK_VBLANK_SRD);
 
 	/* WaVSRefCountFullforceMissDisable:bdw */
 	/* WaDSRefCountFullforceMissDisable:bdw */
-	I915_WRITE(GEN7_FF_THREAD_MODE,
-		   I915_READ(GEN7_FF_THREAD_MODE) &
-		   ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
+	MMIO_WA_CLR_BIT(GEN7_FF_THREAD_MODE, GEN8_FF_DS_REF_CNT_FFME |
+					     GEN7_FF_VS_REF_CNT_FFME);
 
-	I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
-		   _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
+	MMIO_WA_SET_BIT_MASKED(GEN6_RC_SLEEP_PSMI_CONTROL,
+			       GEN8_RC_SEMA_IDLE_MSG_DISABLE);
 
 	/* WaGttCachingOffByDefault:bdw */
-	I915_WRITE(HSW_GTT_CACHE_EN, can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
+	MMIO_WA_SET_FIELD(HSW_GTT_CACHE_EN, 0xFFFFFFFF,
+			  can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
 
 	/* WaKVMNotificationOnConfigChange:bdw */
-	I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1)
-		   | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT);
+	MMIO_WA_SET_BIT(CHICKEN_PAR2_1, KVM_CONFIG_CHANGE_NOTIFICATION_SELECT);
+
+	return 0;
 }
 
-static void chv_mmio_workarounds_apply(struct drm_i915_private *dev_priv)
+static int chv_mmio_workarounds_init(struct drm_i915_private *dev_priv)
 {
 	/* WaVSRefCountFullforceMissDisable:chv */
 	/* WaDSRefCountFullforceMissDisable:chv */
-	I915_WRITE(GEN7_FF_THREAD_MODE,
-		   I915_READ(GEN7_FF_THREAD_MODE) &
-		   ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
+	MMIO_WA_CLR_BIT(GEN7_FF_THREAD_MODE, GEN8_FF_DS_REF_CNT_FFME |
+					     GEN7_FF_VS_REF_CNT_FFME);
 
 	/* WaDisableSemaphoreAndSyncFlipWait:chv */
-	I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
-		   _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
+	MMIO_WA_SET_BIT_MASKED(GEN6_RC_SLEEP_PSMI_CONTROL,
+			       GEN8_RC_SEMA_IDLE_MSG_DISABLE);
 
 	/*
 	 * GTT cache may not work with big pages, so if those
 	 * are ever enabled GTT cache may need to be disabled.
 	 */
-	I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
+	MMIO_WA_SET_FIELD(HSW_GTT_CACHE_EN, 0xFFFFFFFF, GTT_CACHE_EN_ALL);
+
+	return 0;
 }
 
-static void gen9_mmio_workarounds_apply(struct drm_i915_private *dev_priv)
+static int gen9_mmio_workarounds_init(struct drm_i915_private *dev_priv)
 {
 	if (HAS_LLC(dev_priv)) {
 		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
@@ -578,10 +611,7 @@  static void gen9_mmio_workarounds_apply(struct drm_i915_private *dev_priv)
 		 * Must match Display Engine. See
 		 * WaCompressedResourceDisplayNewHashMode.
 		 */
-		I915_WRITE(MMCD_MISC_CTRL,
-			   I915_READ(MMCD_MISC_CTRL) |
-			   MMCD_PCLA |
-			   MMCD_HOTSPOT_EN);
+		MMIO_WA_SET_BIT(MMCD_MISC_CTRL, MMCD_PCLA | MMCD_HOTSPOT_EN);
 
 		/*
 		 * WaCompressedResourceDisplayNewHashMode:skl,kbl
@@ -590,297 +620,305 @@  static void gen9_mmio_workarounds_apply(struct drm_i915_private *dev_priv)
 		 * Must match Sampler, Pixel Back End, and Media. See
 		 * WaCompressedResourceSamplerPbeMediaNewHashMode.
 		 */
-		I915_WRITE(CHICKEN_PAR1_1,
-			   I915_READ(CHICKEN_PAR1_1) |
-			   SKL_DE_COMPRESSED_HASH_MODE);
+		MMIO_WA_SET_BIT(CHICKEN_PAR1_1, SKL_DE_COMPRESSED_HASH_MODE);
 	}
 
 	/* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl,cfl */
-	I915_WRITE(CHICKEN_PAR1_1,
-		   I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP);
+	MMIO_WA_SET_BIT(CHICKEN_PAR1_1, SKL_EDP_PSR_FIX_RDWRAP);
 
-	I915_WRITE(GEN8_CONFIG0,
-		   I915_READ(GEN8_CONFIG0) | GEN9_DEFAULT_FIXES);
+	MMIO_WA_SET_BIT(GEN8_CONFIG0, GEN9_DEFAULT_FIXES);
 
 	/* WaEnableChickenDCPR:skl,bxt,kbl,glk,cfl */
-	I915_WRITE(GEN8_CHICKEN_DCPR_1,
-		   I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
+	MMIO_WA_SET_BIT(GEN8_CHICKEN_DCPR_1, MASK_WAKEMEM);
 
 	/* WaFbcTurnOffFbcWatermark:skl,bxt,kbl,cfl */
 	/* WaFbcWakeMemOn:skl,bxt,kbl,glk,cfl */
-	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
-		   DISP_FBC_WM_DIS |
-		   DISP_FBC_MEMORY_WAKE);
+	MMIO_WA_SET_BIT(DISP_ARB_CTL, DISP_FBC_WM_DIS | DISP_FBC_MEMORY_WAKE);
 
 	/* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl,cfl */
-	I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
-		   ILK_DPFC_DISABLE_DUMMY0);
+	MMIO_WA_SET_BIT(ILK_DPFC_CHICKEN, ILK_DPFC_DISABLE_DUMMY0);
 
 	/* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
-	I915_WRITE(GEN9_CSFE_CHICKEN1_RCS,
-		   _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE));
+	MMIO_WA_SET_BIT_MASKED(GEN9_CSFE_CHICKEN1_RCS,
+			       GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
 
 	/* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
-	I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
-		   GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
+	MMIO_WA_SET_BIT(BDW_SCRATCH1, GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
 
 	/* WaDisableKillLogic:bxt,skl,kbl */
 	if (!IS_COFFEELAKE(dev_priv))
-		I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
-			   ECOCHK_DIS_TLB);
+		MMIO_WA_SET_BIT(GAM_ECOCHK, ECOCHK_DIS_TLB);
 
 	/* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
-	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
-		   BDW_DISABLE_HDC_INVALIDATION);
+	MMIO_WA_SET_BIT(GAM_ECOCHK, BDW_DISABLE_HDC_INVALIDATION);
 
 	/* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
-	I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) |
-				    GEN8_LQSC_FLUSH_COHERENT_LINES));
+	MMIO_WA_SET_BIT(GEN8_L3SQCREG4, GEN8_LQSC_FLUSH_COHERENT_LINES);
 
 	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
-	I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
-		   _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
+	MMIO_WA_SET_BIT_MASKED(GEN7_FF_SLICE_CS_CHICKEN1,
+			       GEN9_FFSC_PERCTX_PREEMPT_CTRL);
+
+	return 0;
 }
 
-static void skl_mmio_workarounds_apply(struct drm_i915_private *dev_priv)
+static int skl_mmio_workarounds_init(struct drm_i915_private *dev_priv)
 {
-	gen9_mmio_workarounds_apply(dev_priv);
+	int ret;
+
+	ret = gen9_mmio_workarounds_init(dev_priv);
+	if (ret)
+		return ret;
 
 	/* WaDisableDopClockGating */
-	I915_WRITE(GEN7_MISCCPCTL, I915_READ(GEN7_MISCCPCTL)
-		   & ~GEN7_DOP_CLOCK_GATE_ENABLE);
+	MMIO_WA_CLR_BIT(GEN7_MISCCPCTL, GEN7_DOP_CLOCK_GATE_ENABLE);
 
 	/* WAC6entrylatency:skl */
-	I915_WRITE(FBC_LLC_READ_CTRL, I915_READ(FBC_LLC_READ_CTRL) |
-		   FBC_LLC_FULLY_OPEN);
+	MMIO_WA_SET_BIT(FBC_LLC_READ_CTRL, FBC_LLC_FULLY_OPEN);
 
 	/* WaFbcNukeOnHostModify:skl */
-	I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
-		   ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
+	MMIO_WA_SET_BIT(ILK_DPFC_CHICKEN, ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
 
 	/* WaEnableGapsTsvCreditFix:skl */
-	I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
-				   GEN9_GAPS_TSV_CREDIT_DISABLE));
+	MMIO_WA_SET_BIT(GEN8_GARBCNTL, GEN9_GAPS_TSV_CREDIT_DISABLE);
 
 	/* WaDisableGafsUnitClkGating:skl */
-	I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) |
-				  GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE));
+	MMIO_WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
 
 	/* WaInPlaceDecompressionHang:skl */
 	if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
-		I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
-			   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
-			    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
+		MMIO_WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
+				GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
+
+	return 0;
 }
 
-static void bxt_mmio_workarounds_apply(struct drm_i915_private *dev_priv)
+static int bxt_mmio_workarounds_init(struct drm_i915_private *dev_priv)
 {
-	gen9_mmio_workarounds_apply(dev_priv);
+	int ret;
+
+	ret = gen9_mmio_workarounds_init(dev_priv);
+	if (ret)
+		return ret;
 
 	/* WaDisableSDEUnitClockGating:bxt */
-	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
-		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+	MMIO_WA_SET_BIT(GEN8_UCGCTL6, GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
 
 	/*
 	 * FIXME:
 	 * GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
 	 */
-	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
-		   GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
+	MMIO_WA_SET_BIT(GEN8_UCGCTL6, GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
 
 	/*
 	 * Wa: Backlight PWM may stop in the asserted state, causing backlight
 	 * to stay fully on.
 	 */
-	I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
-		   PWM1_GATING_DIS | PWM2_GATING_DIS);
+	MMIO_WA_SET_BIT(GEN9_CLKGATE_DIS_0, PWM1_GATING_DIS | PWM2_GATING_DIS);
 
 	/* WaStoreMultiplePTEenable:bxt */
 	/* This is a requirement according to Hardware specification */
 	if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1))
-		I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
+		MMIO_WA_SET_BIT(TILECTL, TILECTL_TLBPF);
 
 	/* WaSetClckGatingDisableMedia:bxt */
 	if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) {
-		I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
-					    ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE));
+		MMIO_WA_CLR_BIT(GEN7_MISCCPCTL, GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE);
 	}
 
 	/* WaDisablePooledEuLoadBalancingFix:bxt */
 	if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) {
-		I915_WRITE(FF_SLICE_CS_CHICKEN2,
-			   _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE));
+		MMIO_WA_SET_BIT_MASKED(FF_SLICE_CS_CHICKEN2,
+				       GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
 	}
 
 	/* WaProgramL3SqcReg1DefaultForPerf:bxt */
 	if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER))
-		I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) |
-					   L3_HIGH_PRIO_CREDITS(2));
+		MMIO_WA_SET_FIELD(GEN8_L3SQCREG1, L3_PRIO_CREDITS_MASK,
+				  L3_GENERAL_PRIO_CREDITS(62) |
+				  L3_HIGH_PRIO_CREDITS(2));
 
 	/* WaInPlaceDecompressionHang:bxt */
 	if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
-		I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
-			   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
-			    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
+		MMIO_WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
+				GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
+
+	return 0;
 }
 
-static void kbl_mmio_workarounds_apply(struct drm_i915_private *dev_priv)
+static int kbl_mmio_workarounds_init(struct drm_i915_private *dev_priv)
 {
-	gen9_mmio_workarounds_apply(dev_priv);
+	int ret;
+
+	ret = gen9_mmio_workarounds_init(dev_priv);
+	if (ret)
+		return ret;
 
 	/* WaDisableSDEUnitClockGating:kbl */
 	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
-		I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
-			   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+		MMIO_WA_SET_BIT(GEN8_UCGCTL6, GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
 
 	/* WaDisableGamClockGating:kbl */
 	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
-		I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) |
-			   GEN6_GAMUNIT_CLOCK_GATE_DISABLE);
+		MMIO_WA_SET_BIT(GEN6_UCGCTL1, GEN6_GAMUNIT_CLOCK_GATE_DISABLE);
 
 	/* WaFbcNukeOnHostModify:kbl */
-	I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) |
-		   ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
+	MMIO_WA_SET_BIT(ILK_DPFC_CHICKEN, ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
 
 	/* WaEnableGapsTsvCreditFix:kbl */
-	I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
-				   GEN9_GAPS_TSV_CREDIT_DISABLE));
+	MMIO_WA_SET_BIT(GEN8_GARBCNTL, GEN9_GAPS_TSV_CREDIT_DISABLE);
 
 	/* WaDisableDynamicCreditSharing:kbl */
 	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
-		I915_WRITE(GAMT_CHKN_BIT_REG,
-			   (I915_READ(GAMT_CHKN_BIT_REG) |
-			    GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING));
+		MMIO_WA_SET_BIT(GAMT_CHKN_BIT_REG,
+				GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
 
 	/* WaDisableGafsUnitClkGating:kbl */
-	I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) |
-				  GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE));
+	MMIO_WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
 
 	/* WaInPlaceDecompressionHang:kbl */
-	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
-		   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
-		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
+	MMIO_WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
+			GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
+
+	return 0;
 }
 
-static void glk_mmio_workarounds_apply(struct drm_i915_private *dev_priv)
+static int glk_mmio_workarounds_init(struct drm_i915_private *dev_priv)
 {
-	u32 val;
+	int ret;
 
-	gen9_mmio_workarounds_apply(dev_priv);
+	ret = gen9_mmio_workarounds_init(dev_priv);
+	if (ret)
+		return ret;
 
 	/*
 	 * WaDisablePWMClockGating:glk
 	 * Backlight PWM may stop in the asserted state, causing backlight
 	 * to stay fully on.
 	 */
-	I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
-		   PWM1_GATING_DIS | PWM2_GATING_DIS);
+	MMIO_WA_SET_BIT(GEN9_CLKGATE_DIS_0, PWM1_GATING_DIS | PWM2_GATING_DIS);
 
 	/* WaDDIIOTimeout:glk */
-	if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1)) {
-		u32 val = I915_READ(CHICKEN_MISC_2);
-		val &= ~(GLK_CL0_PWR_DOWN |
-			 GLK_CL1_PWR_DOWN |
-			 GLK_CL2_PWR_DOWN);
-		I915_WRITE(CHICKEN_MISC_2, val);
-	}
+	if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1))
+		MMIO_WA_CLR_BIT(CHICKEN_MISC_2,
+				GLK_CL0_PWR_DOWN |
+				GLK_CL1_PWR_DOWN |
+				GLK_CL2_PWR_DOWN);
 
 	/* Display WA #1133: WaFbcSkipSegments:glk */
-	val = I915_READ(ILK_DPFC_CHICKEN);
-	val &= ~GLK_SKIP_SEG_COUNT_MASK;
-	val |= GLK_SKIP_SEG_EN | GLK_SKIP_SEG_COUNT(1);
-	I915_WRITE(ILK_DPFC_CHICKEN, val);
+	MMIO_WA_SET_FIELD(ILK_DPFC_CHICKEN, GLK_SKIP_SEG_COUNT_MASK,
+			  GLK_SKIP_SEG_EN | GLK_SKIP_SEG_COUNT(1));
+
+	return 0;
 }
 
-static void cfl_mmio_workarounds_apply(struct drm_i915_private *dev_priv)
+static int cfl_mmio_workarounds_init(struct drm_i915_private *dev_priv)
 {
-	gen9_mmio_workarounds_apply(dev_priv);
+	int ret;
+
+	ret = gen9_mmio_workarounds_init(dev_priv);
+	if (ret)
+		return ret;
 
 	/* WaFbcNukeOnHostModify:cfl */
-	I915_WRITE(ILK_DPFC_CHICKEN,
-		   I915_READ(ILK_DPFC_CHICKEN) |
-			     ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
+	MMIO_WA_SET_BIT(ILK_DPFC_CHICKEN, ILK_DPFC_NUKE_ON_ANY_MODIFICATION);
 
 	/* WaEnableGapsTsvCreditFix:cfl */
-	I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
-				   GEN9_GAPS_TSV_CREDIT_DISABLE));
+	MMIO_WA_SET_BIT(GEN8_GARBCNTL, GEN9_GAPS_TSV_CREDIT_DISABLE);
 
 	/* WaDisableGafsUnitClkGating:cfl */
-	I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) |
-				  GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE));
+	MMIO_WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
 
 	/* WaInPlaceDecompressionHang:cfl */
-	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
-		   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
-		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
+	MMIO_WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
+			GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
+
+	return 0;
 }
 
-static void cnl_mmio_workarounds_apply(struct drm_i915_private *dev_priv)
+static int cnl_mmio_workarounds_init(struct drm_i915_private *dev_priv)
 {
-	u32 val;
-
 	/* This is not an Wa. Enable for better image quality */
-	I915_WRITE(_3D_CHICKEN3,
-		   _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE));
+	MMIO_WA_SET_BIT_MASKED(_3D_CHICKEN3, _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
 
 	/* WaEnableChickenDCPR:cnl */
-	I915_WRITE(GEN8_CHICKEN_DCPR_1,
-		   I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
+	MMIO_WA_SET_BIT(GEN8_CHICKEN_DCPR_1, MASK_WAKEMEM);
 
 	/* WaFbcWakeMemOn:cnl */
-	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
-		   DISP_FBC_MEMORY_WAKE);
+	MMIO_WA_SET_BIT(DISP_ARB_CTL, DISP_FBC_MEMORY_WAKE);
 
 	/* WaSarbUnitClockGatingDisable:cnl (pre-prod) */
 	if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0))
-		I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE,
-			   I915_READ(SLICE_UNIT_LEVEL_CLKGATE) |
-			   SARBUNIT_CLKGATE_DIS);
+		MMIO_WA_SET_BIT(SLICE_UNIT_LEVEL_CLKGATE, SARBUNIT_CLKGATE_DIS);
 
 	/* Display WA #1133: WaFbcSkipSegments:cnl */
-	val = I915_READ(ILK_DPFC_CHICKEN);
-	val &= ~GLK_SKIP_SEG_COUNT_MASK;
-	val |= GLK_SKIP_SEG_EN | GLK_SKIP_SEG_COUNT(1);
-	I915_WRITE(ILK_DPFC_CHICKEN, val);
+	MMIO_WA_SET_FIELD(ILK_DPFC_CHICKEN, GLK_SKIP_SEG_COUNT_MASK,
+			  GLK_SKIP_SEG_EN | GLK_SKIP_SEG_COUNT(1));
 
 	/* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
 	if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
-		I915_WRITE(GAMT_CHKN_BIT_REG,
-			   (I915_READ(GAMT_CHKN_BIT_REG) |
-			    GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT));
+		MMIO_WA_SET_BIT(GAMT_CHKN_BIT_REG,
+				GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
 
 	/* WaInPlaceDecompressionHang:cnl */
-	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
-		   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
-		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
+	MMIO_WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
+			GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 
 	/* WaEnablePreemptionGranularityControlByUMD:cnl */
-	I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
-		   _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
+	MMIO_WA_SET_BIT_MASKED(GEN7_FF_SLICE_CS_CHICKEN1,
+			       GEN9_FFSC_PERCTX_PREEMPT_CTRL);
+
+	return 0;
 }
 
-void intel_mmio_workarounds_apply(struct drm_i915_private *dev_priv)
+int intel_mmio_workarounds_init(struct drm_i915_private *dev_priv)
 {
+	int err;
+
+	dev_priv->workarounds.mmio_wa_count = 0;
+
 	if (INTEL_GEN(dev_priv) < 8)
-		return;
+		err = 0;
 	else if (IS_BROADWELL(dev_priv))
-		bdw_mmio_workarounds_apply(dev_priv);
+		err = bdw_mmio_workarounds_init(dev_priv);
 	else if (IS_CHERRYVIEW(dev_priv))
-		chv_mmio_workarounds_apply(dev_priv);
+		err = chv_mmio_workarounds_init(dev_priv);
 	else if (IS_SKYLAKE(dev_priv))
-		skl_mmio_workarounds_apply(dev_priv);
+		err = skl_mmio_workarounds_init(dev_priv);
 	else if (IS_BROXTON(dev_priv))
-		bxt_mmio_workarounds_apply(dev_priv);
+		err = bxt_mmio_workarounds_init(dev_priv);
 	else if (IS_KABYLAKE(dev_priv))
-		kbl_mmio_workarounds_apply(dev_priv);
+		err = kbl_mmio_workarounds_init(dev_priv);
 	else if (IS_GEMINILAKE(dev_priv))
-		glk_mmio_workarounds_apply(dev_priv);
+		err = glk_mmio_workarounds_init(dev_priv);
 	else if (IS_COFFEELAKE(dev_priv))
-		cfl_mmio_workarounds_apply(dev_priv);
+		err = cfl_mmio_workarounds_init(dev_priv);
 	else if (IS_CANNONLAKE(dev_priv))
-		cnl_mmio_workarounds_apply(dev_priv);
-	else
+		err = cnl_mmio_workarounds_init(dev_priv);
+	else {
 		MISSING_CASE(INTEL_GEN(dev_priv));
+		err = 0;
+	}
+	if (err)
+		return err;
+
+	DRM_DEBUG_DRIVER("Number of MMIO w/a: %d\n",
+			 dev_priv->workarounds.mmio_wa_count);
+	return 0;
+}
+
+void intel_mmio_workarounds_apply(struct drm_i915_private *dev_priv)
+{
+	struct i915_workarounds *w = &dev_priv->workarounds;
+	int i;
+
+	for (i = 0; i < w->mmio_wa_count; i++) {
+		i915_reg_t addr = w->mmio_wa_reg[i].addr;
+		u32 value = w->mmio_wa_reg[i].value;
+		u32 mask = w->mmio_wa_reg[i].mask;
+
+		I915_WRITE(addr, (I915_READ(addr) & ~mask) | value);
+	}
 }
 
 static int wa_ring_whitelist_reg(struct intel_engine_cs *engine,
diff --git a/drivers/gpu/drm/i915/intel_workarounds.h b/drivers/gpu/drm/i915/intel_workarounds.h
index 53a452a..2f36f27 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.h
+++ b/drivers/gpu/drm/i915/intel_workarounds.h
@@ -28,6 +28,7 @@ 
 int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv);
 int intel_ctx_workarounds_emit(struct drm_i915_gem_request *req);
 
+int intel_mmio_workarounds_init(struct drm_i915_private *dev_priv);
 void intel_mmio_workarounds_apply(struct drm_i915_private *dev_priv);
 
 int intel_whitelist_workarounds_apply(struct intel_engine_cs *engine);