diff mbox

[1/3] drm/i915: Keep the ctx workarounds tightly packed

Message ID 20180615085935.22031-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson June 15, 2018, 8:59 a.m. UTC
For each platform, we have a few registers that rewritten with multiple
values -- they are not part of a sequence, just different parts of a
masked register set at different times (e.g. platform and gen
workarounds). Consolidate these into a single register write to keep the
table compact.

While adjusting the construction of the wa table, make it non fatal so
that the driver still loads but keeping the warning and extra details
for inspection.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Oscar Mateo <oscar.mateo@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c      | 25 ++--------
 drivers/gpu/drm/i915/i915_drv.h          |  2 +-
 drivers/gpu/drm/i915/intel_workarounds.c | 63 +++++++++++++++++-------
 3 files changed, 52 insertions(+), 38 deletions(-)

Comments

Mika Kuoppala June 15, 2018, 11:36 a.m. UTC | #1
Chris Wilson <chris@chris-wilson.co.uk> writes:

> For each platform, we have a few registers that rewritten with multiple
> values -- they are not part of a sequence, just different parts of a
> masked register set at different times (e.g. platform and gen
> workarounds). Consolidate these into a single register write to keep the
> table compact.
>
> While adjusting the construction of the wa table, make it non fatal so
> that the driver still loads but keeping the warning and extra details
> for inspection.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Oscar Mateo <oscar.mateo@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c      | 25 ++--------
>  drivers/gpu/drm/i915/i915_drv.h          |  2 +-
>  drivers/gpu/drm/i915/intel_workarounds.c | 63 +++++++++++++++++-------
>  3 files changed, 52 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index c600279d3db5..f78895ffab9b 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -3378,28 +3378,13 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused)
>  
>  static int i915_wa_registers(struct seq_file *m, void *unused)
>  {
> -	struct drm_i915_private *dev_priv = node_to_i915(m->private);
> -	struct i915_workarounds *workarounds = &dev_priv->workarounds;
> +	struct i915_workarounds *wa = &node_to_i915(m->private)->workarounds;
>  	int i;
>  
> -	intel_runtime_pm_get(dev_priv);
> -
> -	seq_printf(m, "Workarounds applied: %d\n", workarounds->count);
> -	for (i = 0; i < workarounds->count; ++i) {
> -		i915_reg_t addr;
> -		u32 mask, value, read;
> -		bool ok;
> -
> -		addr = workarounds->reg[i].addr;
> -		mask = workarounds->reg[i].mask;
> -		value = workarounds->reg[i].value;
> -		read = I915_READ(addr);
> -		ok = (value & mask) == (read & mask);
> -		seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X, read: 0x%08x, status: %s\n",
> -			   i915_mmio_reg_offset(addr), value, mask, read, ok ? "OK" : "FAIL");
> -	}
> -
> -	intel_runtime_pm_put(dev_priv);
> +	seq_printf(m, "Workarounds applied: %d\n", wa->count);
> +	for (i = 0; i < wa->count; ++i)
> +		seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n",
> +			   wa->reg[i].addr, wa->reg[i].value, wa->reg[i].mask);
>  
>  	return 0;
>  }
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 2c12de678e32..91c389622217 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1308,7 +1308,7 @@ struct i915_frontbuffer_tracking {
>  };
>  
>  struct i915_wa_reg {
> -	i915_reg_t addr;
> +	u32 addr;
>  	u32 value;
>  	/* bitmask representing WA bits */
>  	u32 mask;
> diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
> index 24b929ce3341..f8bb32e974f6 100644
> --- a/drivers/gpu/drm/i915/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/intel_workarounds.c
> @@ -48,29 +48,58 @@
>   * - Public functions to init or apply the given workaround type.
>   */
>  
> -static int wa_add(struct drm_i915_private *dev_priv,
> -		  i915_reg_t addr,
> -		  const u32 mask, const u32 val)
> +static void wa_add(struct drm_i915_private *i915,
> +		   i915_reg_t reg, const u32 mask, const u32 val)
>  {
> -	const unsigned int idx = dev_priv->workarounds.count;
> +	struct i915_workarounds *wa = &i915->workarounds;
> +	unsigned int start = 0, end = wa->count;
> +	unsigned int addr = i915_mmio_reg_offset(reg);
> +	struct i915_wa_reg *r;
> +
> +	while (start < end) {
> +		unsigned int mid = start + (end - start) / 2;
> +
> +		if (wa->reg[mid].addr < addr) {
> +			start = mid + 1;
> +		} else if (wa->reg[mid].addr > addr) {
> +			end = mid;
> +		} else {
> +			r = &wa->reg[mid];
> +
> +			if ((mask & ~r->mask) == 0) {
> +				DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
> +					  addr, r->mask, r->value);
> +
> +				r->value &= ~mask;
> +			}
> +
> +			r->value |= val;
> +			r->mask  |= mask;
> +			return;
> +		}
> +	}
>  
> -	if (WARN_ON(idx >= I915_MAX_WA_REGS))
> -		return -ENOSPC;
> +	if (WARN_ON_ONCE(wa->count >= I915_MAX_WA_REGS)) {
> +		DRM_ERROR("Dropping w/a for reg %04x (mask: %08x, value: %08x)\n",
> +			  addr, mask, val);
> +		return;
> +	}
>  
> -	dev_priv->workarounds.reg[idx].addr = addr;
> -	dev_priv->workarounds.reg[idx].value = val;
> -	dev_priv->workarounds.reg[idx].mask = mask;
> +	r = &wa->reg[wa->count++];
> +	r->addr  = addr;
> +	r->value = val;
> +	r->mask  = mask;
>  
> -	dev_priv->workarounds.count++;
> +	while (r-- > wa->reg) {
> +		GEM_BUG_ON(r[0].addr == r[1].addr);
> +		if (r[1].addr > r[0].addr)
> +			break;
>  
> -	return 0;
> +		swap(r[1], r[0]);
> +	}
>  }
>  
> -#define WA_REG(addr, mask, val) do { \
> -		const int r = wa_add(dev_priv, (addr), (mask), (val)); \
> -		if (r) \
> -			return r; \
> -	} while (0)
> +#define WA_REG(addr, mask, val) wa_add(dev_priv, (addr), (mask), (val))

On previous thread, I noted that this makes the wa init chain
not needing return values.

But cleaning that up can be a followup.

Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
oscar.mateo@intel.com June 15, 2018, 4:01 p.m. UTC | #2
On 6/15/2018 1:59 AM, Chris Wilson wrote:
> For each platform, we have a few registers that rewritten with multiple
> values -- they are not part of a sequence, just different parts of a
> masked register set at different times (e.g. platform and gen
> workarounds). Consolidate these into a single register write to keep the
> table compact.
>
> While adjusting the construction of the wa table, make it non fatal so
> that the driver still loads but keeping the warning and extra details
> for inspection.

A while ago I sent a patch 
(https://patchwork.freedesktop.org/patch/205035/) that uses simple MMIO 
writes to apply ctx workarounds. This is possible since we now have 
proper golden contexts, and avoids the need for these patches.
It also has the advantage that an improperly classified WA doesn't get 
lost (we still need the classification if we want to properly validate 
the WAs, but that's a different story).
Are we sure we prefer to do this instead?

>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Oscar Mateo <oscar.mateo@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c      | 25 ++--------
>   drivers/gpu/drm/i915/i915_drv.h          |  2 +-
>   drivers/gpu/drm/i915/intel_workarounds.c | 63 +++++++++++++++++-------
>   3 files changed, 52 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index c600279d3db5..f78895ffab9b 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -3378,28 +3378,13 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused)
>   
>   static int i915_wa_registers(struct seq_file *m, void *unused)
>   {
> -	struct drm_i915_private *dev_priv = node_to_i915(m->private);
> -	struct i915_workarounds *workarounds = &dev_priv->workarounds;
> +	struct i915_workarounds *wa = &node_to_i915(m->private)->workarounds;
>   	int i;
>   
> -	intel_runtime_pm_get(dev_priv);
> -
> -	seq_printf(m, "Workarounds applied: %d\n", workarounds->count);
> -	for (i = 0; i < workarounds->count; ++i) {
> -		i915_reg_t addr;
> -		u32 mask, value, read;
> -		bool ok;
> -
> -		addr = workarounds->reg[i].addr;
> -		mask = workarounds->reg[i].mask;
> -		value = workarounds->reg[i].value;
> -		read = I915_READ(addr);
> -		ok = (value & mask) == (read & mask);
> -		seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X, read: 0x%08x, status: %s\n",
> -			   i915_mmio_reg_offset(addr), value, mask, read, ok ? "OK" : "FAIL");
> -	}
> -
> -	intel_runtime_pm_put(dev_priv);
> +	seq_printf(m, "Workarounds applied: %d\n", wa->count);
> +	for (i = 0; i < wa->count; ++i)
> +		seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n",
> +			   wa->reg[i].addr, wa->reg[i].value, wa->reg[i].mask);
>   
>   	return 0;
>   }
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 2c12de678e32..91c389622217 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1308,7 +1308,7 @@ struct i915_frontbuffer_tracking {
>   };
>   
>   struct i915_wa_reg {
> -	i915_reg_t addr;
> +	u32 addr;
>   	u32 value;
>   	/* bitmask representing WA bits */
>   	u32 mask;
> diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
> index 24b929ce3341..f8bb32e974f6 100644
> --- a/drivers/gpu/drm/i915/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/intel_workarounds.c
> @@ -48,29 +48,58 @@
>    * - Public functions to init or apply the given workaround type.
>    */
>   
> -static int wa_add(struct drm_i915_private *dev_priv,
> -		  i915_reg_t addr,
> -		  const u32 mask, const u32 val)
> +static void wa_add(struct drm_i915_private *i915,
> +		   i915_reg_t reg, const u32 mask, const u32 val)
>   {
> -	const unsigned int idx = dev_priv->workarounds.count;
> +	struct i915_workarounds *wa = &i915->workarounds;
> +	unsigned int start = 0, end = wa->count;
> +	unsigned int addr = i915_mmio_reg_offset(reg);
> +	struct i915_wa_reg *r;
> +
> +	while (start < end) {
> +		unsigned int mid = start + (end - start) / 2;
> +
> +		if (wa->reg[mid].addr < addr) {
> +			start = mid + 1;
> +		} else if (wa->reg[mid].addr > addr) {
> +			end = mid;
> +		} else {
> +			r = &wa->reg[mid];
> +
> +			if ((mask & ~r->mask) == 0) {
> +				DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
> +					  addr, r->mask, r->value);
> +
> +				r->value &= ~mask;
> +			}
> +
> +			r->value |= val;
> +			r->mask  |= mask;
> +			return;
> +		}
> +	}
>   
> -	if (WARN_ON(idx >= I915_MAX_WA_REGS))
> -		return -ENOSPC;
> +	if (WARN_ON_ONCE(wa->count >= I915_MAX_WA_REGS)) {
> +		DRM_ERROR("Dropping w/a for reg %04x (mask: %08x, value: %08x)\n",
> +			  addr, mask, val);
> +		return;
> +	}
>   
> -	dev_priv->workarounds.reg[idx].addr = addr;
> -	dev_priv->workarounds.reg[idx].value = val;
> -	dev_priv->workarounds.reg[idx].mask = mask;
> +	r = &wa->reg[wa->count++];
> +	r->addr  = addr;
> +	r->value = val;
> +	r->mask  = mask;
>   
> -	dev_priv->workarounds.count++;
> +	while (r-- > wa->reg) {
> +		GEM_BUG_ON(r[0].addr == r[1].addr);
> +		if (r[1].addr > r[0].addr)
> +			break;
>   
> -	return 0;
> +		swap(r[1], r[0]);
> +	}
>   }
>   
> -#define WA_REG(addr, mask, val) do { \
> -		const int r = wa_add(dev_priv, (addr), (mask), (val)); \
> -		if (r) \
> -			return r; \
> -	} while (0)
> +#define WA_REG(addr, mask, val) wa_add(dev_priv, (addr), (mask), (val))
>   
>   #define WA_SET_BIT_MASKED(addr, mask) \
>   	WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
> @@ -540,7 +569,7 @@ int intel_ctx_workarounds_emit(struct i915_request *rq)
>   
>   	*cs++ = MI_LOAD_REGISTER_IMM(w->count);
>   	for (i = 0; i < w->count; i++) {
> -		*cs++ = i915_mmio_reg_offset(w->reg[i].addr);
> +		*cs++ = w->reg[i].addr;
>   		*cs++ = w->reg[i].value;
>   	}
>   	*cs++ = MI_NOOP;
Chris Wilson June 15, 2018, 4:08 p.m. UTC | #3
Quoting Oscar Mateo Lozano (2018-06-15 17:01:37)
> 
> 
> On 6/15/2018 1:59 AM, Chris Wilson wrote:
> > For each platform, we have a few registers that rewritten with multiple
> > values -- they are not part of a sequence, just different parts of a
> > masked register set at different times (e.g. platform and gen
> > workarounds). Consolidate these into a single register write to keep the
> > table compact.
> >
> > While adjusting the construction of the wa table, make it non fatal so
> > that the driver still loads but keeping the warning and extra details
> > for inspection.
> 
> A while ago I sent a patch 
> (https://patchwork.freedesktop.org/patch/205035/) that uses simple MMIO 
> writes to apply ctx workarounds. This is possible since we now have 
> proper golden contexts, and avoids the need for these patches.
> It also has the advantage that an improperly classified WA doesn't get 
> lost (we still need the classification if we want to properly validate 
> the WAs, but that's a different story).
> Are we sure we prefer to do this instead?

Short attention span, I was caught up in trying to fix the overflow.

So I think I want to keep the checker here that we aren't using
conflicting workarounds, and keep the list ordered (because that helps
us when reading and checking them).

Care to respin? :)

Meanwhile, gem_workarounds is still complaining that the write here to
_3D_CHICKEN3 isn't sticking. It works locally, and I can't see anything
to explain why it wouldn't for CI.
-Chris
Ville Syrjälä June 15, 2018, 4:19 p.m. UTC | #4
On Fri, Jun 15, 2018 at 09:01:37AM -0700, Oscar Mateo Lozano wrote:
> 
> 
> On 6/15/2018 1:59 AM, Chris Wilson wrote:
> > For each platform, we have a few registers that rewritten with multiple
> > values -- they are not part of a sequence, just different parts of a
> > masked register set at different times (e.g. platform and gen
> > workarounds). Consolidate these into a single register write to keep the
> > table compact.
> >
> > While adjusting the construction of the wa table, make it non fatal so
> > that the driver still loads but keeping the warning and extra details
> > for inspection.
> 
> A while ago I sent a patch 
> (https://patchwork.freedesktop.org/patch/205035/) that uses simple MMIO 
> writes to apply ctx workarounds. This is possible since we now have 
> proper golden contexts, and avoids the need for these patches.
> It also has the advantage that an improperly classified WA doesn't get 
> lost (we still need the classification if we want to properly validate 
> the WAs, but that's a different story).
> Are we sure we prefer to do this instead?

Wouldn't that require PSMI+FSM dance to make sure execlist has
an active context when you write the regs? Can't see anything like that
in the code currently, nor is there anything in the referenced patch.
Chris Wilson June 15, 2018, 4:22 p.m. UTC | #5
Quoting Ville Syrjälä (2018-06-15 17:19:14)
> On Fri, Jun 15, 2018 at 09:01:37AM -0700, Oscar Mateo Lozano wrote:
> > 
> > 
> > On 6/15/2018 1:59 AM, Chris Wilson wrote:
> > > For each platform, we have a few registers that rewritten with multiple
> > > values -- they are not part of a sequence, just different parts of a
> > > masked register set at different times (e.g. platform and gen
> > > workarounds). Consolidate these into a single register write to keep the
> > > table compact.
> > >
> > > While adjusting the construction of the wa table, make it non fatal so
> > > that the driver still loads but keeping the warning and extra details
> > > for inspection.
> > 
> > A while ago I sent a patch 
> > (https://patchwork.freedesktop.org/patch/205035/) that uses simple MMIO 
> > writes to apply ctx workarounds. This is possible since we now have 
> > proper golden contexts, and avoids the need for these patches.
> > It also has the advantage that an improperly classified WA doesn't get 
> > lost (we still need the classification if we want to properly validate 
> > the WAs, but that's a different story).
> > Are we sure we prefer to do this instead?
> 
> Wouldn't that require PSMI+FSM dance to make sure execlist has
> an active context when you write the regs? Can't see anything like that
> in the code currently, nor is there anything in the referenced patch.

We keep forcewake asserted across the bringup, from before we load the
default context until after we have saved the context image. These mmio
writes should be saved along with the image. That's the theory at least.
-Chris
Ville Syrjälä June 15, 2018, 4:37 p.m. UTC | #6
On Fri, Jun 15, 2018 at 05:22:40PM +0100, Chris Wilson wrote:
> Quoting Ville Syrjälä (2018-06-15 17:19:14)
> > On Fri, Jun 15, 2018 at 09:01:37AM -0700, Oscar Mateo Lozano wrote:
> > > 
> > > 
> > > On 6/15/2018 1:59 AM, Chris Wilson wrote:
> > > > For each platform, we have a few registers that rewritten with multiple
> > > > values -- they are not part of a sequence, just different parts of a
> > > > masked register set at different times (e.g. platform and gen
> > > > workarounds). Consolidate these into a single register write to keep the
> > > > table compact.
> > > >
> > > > While adjusting the construction of the wa table, make it non fatal so
> > > > that the driver still loads but keeping the warning and extra details
> > > > for inspection.
> > > 
> > > A while ago I sent a patch 
> > > (https://patchwork.freedesktop.org/patch/205035/) that uses simple MMIO 
> > > writes to apply ctx workarounds. This is possible since we now have 
> > > proper golden contexts, and avoids the need for these patches.
> > > It also has the advantage that an improperly classified WA doesn't get 
> > > lost (we still need the classification if we want to properly validate 
> > > the WAs, but that's a different story).
> > > Are we sure we prefer to do this instead?
> > 
> > Wouldn't that require PSMI+FSM dance to make sure execlist has
> > an active context when you write the regs? Can't see anything like that
> > in the code currently, nor is there anything in the referenced patch.
> 
> We keep forcewake asserted across the bringup, from before we load the
> default context until after we have saved the context image. These mmio
> writes should be saved along with the image. That's the theory at least.

Force wake isn't quite enough from what I understand. Or maybe it was
that there is at least some delay between forcewake ack asserting and
the context actually having been loaded (just my recollection of what
Mika's experiments once showed).

The spec at least still lists the extra dance for MMIO access into
context saved registers.
Chris Wilson June 15, 2018, 4:43 p.m. UTC | #7
Quoting Ville Syrjälä (2018-06-15 17:37:44)
> On Fri, Jun 15, 2018 at 05:22:40PM +0100, Chris Wilson wrote:
> > Quoting Ville Syrjälä (2018-06-15 17:19:14)
> > > On Fri, Jun 15, 2018 at 09:01:37AM -0700, Oscar Mateo Lozano wrote:
> > > > 
> > > > 
> > > > On 6/15/2018 1:59 AM, Chris Wilson wrote:
> > > > > For each platform, we have a few registers that rewritten with multiple
> > > > > values -- they are not part of a sequence, just different parts of a
> > > > > masked register set at different times (e.g. platform and gen
> > > > > workarounds). Consolidate these into a single register write to keep the
> > > > > table compact.
> > > > >
> > > > > While adjusting the construction of the wa table, make it non fatal so
> > > > > that the driver still loads but keeping the warning and extra details
> > > > > for inspection.
> > > > 
> > > > A while ago I sent a patch 
> > > > (https://patchwork.freedesktop.org/patch/205035/) that uses simple MMIO 
> > > > writes to apply ctx workarounds. This is possible since we now have 
> > > > proper golden contexts, and avoids the need for these patches.
> > > > It also has the advantage that an improperly classified WA doesn't get 
> > > > lost (we still need the classification if we want to properly validate 
> > > > the WAs, but that's a different story).
> > > > Are we sure we prefer to do this instead?
> > > 
> > > Wouldn't that require PSMI+FSM dance to make sure execlist has
> > > an active context when you write the regs? Can't see anything like that
> > > in the code currently, nor is there anything in the referenced patch.
> > 
> > We keep forcewake asserted across the bringup, from before we load the
> > default context until after we have saved the context image. These mmio
> > writes should be saved along with the image. That's the theory at least.
> 
> Force wake isn't quite enough from what I understand. Or maybe it was
> that there is at least some delay between forcewake ack asserting and
> the context actually having been loaded (just my recollection of what
> Mika's experiments once showed).
> 
> The spec at least still lists the extra dance for MMIO access into
> context saved registers.

We do at least have the advantage now of checking that all contexts start
with the expected w/a register state. Hmm, if we stuff those registers
with garbage inside gem_workarounds, that will help to confirm that we
do load them correctly. (If having them scrubbed by reset isn't enough!)
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index c600279d3db5..f78895ffab9b 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3378,28 +3378,13 @@  static int i915_shared_dplls_info(struct seq_file *m, void *unused)
 
 static int i915_wa_registers(struct seq_file *m, void *unused)
 {
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct i915_workarounds *workarounds = &dev_priv->workarounds;
+	struct i915_workarounds *wa = &node_to_i915(m->private)->workarounds;
 	int i;
 
-	intel_runtime_pm_get(dev_priv);
-
-	seq_printf(m, "Workarounds applied: %d\n", workarounds->count);
-	for (i = 0; i < workarounds->count; ++i) {
-		i915_reg_t addr;
-		u32 mask, value, read;
-		bool ok;
-
-		addr = workarounds->reg[i].addr;
-		mask = workarounds->reg[i].mask;
-		value = workarounds->reg[i].value;
-		read = I915_READ(addr);
-		ok = (value & mask) == (read & mask);
-		seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X, read: 0x%08x, status: %s\n",
-			   i915_mmio_reg_offset(addr), value, mask, read, ok ? "OK" : "FAIL");
-	}
-
-	intel_runtime_pm_put(dev_priv);
+	seq_printf(m, "Workarounds applied: %d\n", wa->count);
+	for (i = 0; i < wa->count; ++i)
+		seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n",
+			   wa->reg[i].addr, wa->reg[i].value, wa->reg[i].mask);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2c12de678e32..91c389622217 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1308,7 +1308,7 @@  struct i915_frontbuffer_tracking {
 };
 
 struct i915_wa_reg {
-	i915_reg_t addr;
+	u32 addr;
 	u32 value;
 	/* bitmask representing WA bits */
 	u32 mask;
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
index 24b929ce3341..f8bb32e974f6 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -48,29 +48,58 @@ 
  * - Public functions to init or apply the given workaround type.
  */
 
-static int wa_add(struct drm_i915_private *dev_priv,
-		  i915_reg_t addr,
-		  const u32 mask, const u32 val)
+static void wa_add(struct drm_i915_private *i915,
+		   i915_reg_t reg, const u32 mask, const u32 val)
 {
-	const unsigned int idx = dev_priv->workarounds.count;
+	struct i915_workarounds *wa = &i915->workarounds;
+	unsigned int start = 0, end = wa->count;
+	unsigned int addr = i915_mmio_reg_offset(reg);
+	struct i915_wa_reg *r;
+
+	while (start < end) {
+		unsigned int mid = start + (end - start) / 2;
+
+		if (wa->reg[mid].addr < addr) {
+			start = mid + 1;
+		} else if (wa->reg[mid].addr > addr) {
+			end = mid;
+		} else {
+			r = &wa->reg[mid];
+
+			if ((mask & ~r->mask) == 0) {
+				DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
+					  addr, r->mask, r->value);
+
+				r->value &= ~mask;
+			}
+
+			r->value |= val;
+			r->mask  |= mask;
+			return;
+		}
+	}
 
-	if (WARN_ON(idx >= I915_MAX_WA_REGS))
-		return -ENOSPC;
+	if (WARN_ON_ONCE(wa->count >= I915_MAX_WA_REGS)) {
+		DRM_ERROR("Dropping w/a for reg %04x (mask: %08x, value: %08x)\n",
+			  addr, mask, val);
+		return;
+	}
 
-	dev_priv->workarounds.reg[idx].addr = addr;
-	dev_priv->workarounds.reg[idx].value = val;
-	dev_priv->workarounds.reg[idx].mask = mask;
+	r = &wa->reg[wa->count++];
+	r->addr  = addr;
+	r->value = val;
+	r->mask  = mask;
 
-	dev_priv->workarounds.count++;
+	while (r-- > wa->reg) {
+		GEM_BUG_ON(r[0].addr == r[1].addr);
+		if (r[1].addr > r[0].addr)
+			break;
 
-	return 0;
+		swap(r[1], r[0]);
+	}
 }
 
-#define WA_REG(addr, mask, val) do { \
-		const int r = wa_add(dev_priv, (addr), (mask), (val)); \
-		if (r) \
-			return r; \
-	} while (0)
+#define WA_REG(addr, mask, val) wa_add(dev_priv, (addr), (mask), (val))
 
 #define WA_SET_BIT_MASKED(addr, mask) \
 	WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
@@ -540,7 +569,7 @@  int intel_ctx_workarounds_emit(struct i915_request *rq)
 
 	*cs++ = MI_LOAD_REGISTER_IMM(w->count);
 	for (i = 0; i < w->count; i++) {
-		*cs++ = i915_mmio_reg_offset(w->reg[i].addr);
+		*cs++ = w->reg[i].addr;
 		*cs++ = w->reg[i].value;
 	}
 	*cs++ = MI_NOOP;