[1/2] drm/i915: Keep the ctx workarounds tightly packed

Message ID	20180615080045.6671-1-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Fri, 15 Jun 2018 09:00:44 +0100 Message-Id: <20180615080045.6671-1-chris@chris-wilson.co.uk> Subject: [Intel-gfx] [PATCH 1/2] drm/i915: Keep the ctx workarounds tightly packed Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Message ID

20180615080045.6671-1-chris@chris-wilson.co.uk (mailing list archive)

State

New, archived

Headers

From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Date: Fri, 15 Jun 2018 09:00:44 +0100
Message-Id: <20180615080045.6671-1-chris@chris-wilson.co.uk>
Subject: [Intel-gfx] [PATCH 1/2] drm/i915: Keep the ctx workarounds tightly
	packed
Precedence: list
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
Errors-To: intel-gfx-bounces@lists.freedesktop.org
Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Commit Message

Chris Wilson June 15, 2018, 8 a.m. UTC

For each platform, we have a few registers that rewritten with multiple
values -- they are not part of a sequence, just different parts of a
masked register set at different times (e.g. platform and gen
workarounds). Consolidate these into a single register write to keep the
table compact.

While adjusting the construction of the wa table, make it non fatal so
that the driver still loads but keeping the warning and extra details
for inspection.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Oscar Mateo <oscar.mateo@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c      | 25 ++--------
 drivers/gpu/drm/i915/i915_drv.h          |  2 +-
 drivers/gpu/drm/i915/intel_workarounds.c | 63 +++++++++++++++++-------
 3 files changed, 52 insertions(+), 38 deletions(-)

Comments

Mika Kuoppala June 15, 2018, 11:29 a.m. UTC | #1

Chris Wilson <chris@chris-wilson.co.uk> writes:

> For each platform, we have a few registers that rewritten with multiple
> values -- they are not part of a sequence, just different parts of a
> masked register set at different times (e.g. platform and gen
> workarounds). Consolidate these into a single register write to keep the
> table compact.
>
> While adjusting the construction of the wa table, make it non fatal so
> that the driver still loads but keeping the warning and extra details
> for inspection.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Oscar Mateo <oscar.mateo@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c      | 25 ++--------
>  drivers/gpu/drm/i915/i915_drv.h          |  2 +-
>  drivers/gpu/drm/i915/intel_workarounds.c | 63 +++++++++++++++++-------
>  3 files changed, 52 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index c600279d3db5..f78895ffab9b 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -3378,28 +3378,13 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused)
>  
>  static int i915_wa_registers(struct seq_file *m, void *unused)
>  {
> -	struct drm_i915_private *dev_priv = node_to_i915(m->private);
> -	struct i915_workarounds *workarounds = &dev_priv->workarounds;
> +	struct i915_workarounds *wa = &node_to_i915(m->private)->workarounds;
>  	int i;
>  
> -	intel_runtime_pm_get(dev_priv);
> -
> -	seq_printf(m, "Workarounds applied: %d\n", workarounds->count);
> -	for (i = 0; i < workarounds->count; ++i) {
> -		i915_reg_t addr;
> -		u32 mask, value, read;
> -		bool ok;
> -
> -		addr = workarounds->reg[i].addr;
> -		mask = workarounds->reg[i].mask;
> -		value = workarounds->reg[i].value;
> -		read = I915_READ(addr);
> -		ok = (value & mask) == (read & mask);
> -		seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X, read: 0x%08x, status: %s\n",
> -			   i915_mmio_reg_offset(addr), value, mask, read, ok ? "OK" : "FAIL");
> -	}
> -
> -	intel_runtime_pm_put(dev_priv);
> +	seq_printf(m, "Workarounds applied: %d\n", wa->count);
> +	for (i = 0; i < wa->count; ++i)
> +		seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n",
> +			   wa->reg[i].addr, wa->reg[i].value, wa->reg[i].mask);

It seems that gem_workarounds is already smart enough to not
parse/trust the driver provided read value.

Perhaps the only value in here was that we saw what
were context and non context saved ones. But we
have other tools for that.

>  
>  	return 0;
>  }
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 2c12de678e32..91c389622217 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1308,7 +1308,7 @@ struct i915_frontbuffer_tracking {
>  };
>  
>  struct i915_wa_reg {
> -	i915_reg_t addr;
> +	u32 addr;
>  	u32 value;
>  	/* bitmask representing WA bits */
>  	u32 mask;
> diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
> index 24b929ce3341..f8bb32e974f6 100644
> --- a/drivers/gpu/drm/i915/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/intel_workarounds.c
> @@ -48,29 +48,58 @@
>   * - Public functions to init or apply the given workaround type.
>   */
>  
> -static int wa_add(struct drm_i915_private *dev_priv,
> -		  i915_reg_t addr,
> -		  const u32 mask, const u32 val)
> +static void wa_add(struct drm_i915_private *i915,
> +		   i915_reg_t reg, const u32 mask, const u32 val)
>  {
> -	const unsigned int idx = dev_priv->workarounds.count;
> +	struct i915_workarounds *wa = &i915->workarounds;
> +	unsigned int start = 0, end = wa->count;
> +	unsigned int addr = i915_mmio_reg_offset(reg);
> +	struct i915_wa_reg *r;
> +
> +	while (start < end) {
> +		unsigned int mid = start + (end - start) / 2;
> +
> +		if (wa->reg[mid].addr < addr) {
> +			start = mid + 1;
> +		} else if (wa->reg[mid].addr > addr) {
> +			end = mid;
> +		} else {
> +			r = &wa->reg[mid];
> +
> +			if ((mask & ~r->mask) == 0) {
> +				DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
> +					  addr, r->mask, r->value);
> +
> +				r->value &= ~mask;
> +			}
> +
> +			r->value |= val;
> +			r->mask  |= mask;
> +			return;
> +		}
> +	}
>  
> -	if (WARN_ON(idx >= I915_MAX_WA_REGS))
> -		return -ENOSPC;
> +	if (WARN_ON_ONCE(wa->count >= I915_MAX_WA_REGS)) {
> +		DRM_ERROR("Dropping w/a for reg %04x (mask: %08x, value: %08x)\n",
> +			  addr, mask, val);
> +		return;
> +	}
>  
> -	dev_priv->workarounds.reg[idx].addr = addr;
> -	dev_priv->workarounds.reg[idx].value = val;
> -	dev_priv->workarounds.reg[idx].mask = mask;
> +	r = &wa->reg[wa->count++];
> +	r->addr  = addr;
> +	r->value = val;
> +	r->mask  = mask;
>  
> -	dev_priv->workarounds.count++;
> +	while (r-- > wa->reg) {
> +		GEM_BUG_ON(r[0].addr == r[1].addr);
> +		if (r[1].addr > r[0].addr)
> +			break;

Keeping the workarounds in a sorted order by addr,
could be mentioned in the header. But not insisting
on that as this is rather clear from the code.

>  
> -	return 0;
> +		swap(r[1], r[0]);
> +	}
>  }
>  
> -#define WA_REG(addr, mask, val) do { \
> -		const int r = wa_add(dev_priv, (addr), (mask), (val)); \
> -		if (r) \
> -			return r; \
> -	} while (0)
> +#define WA_REG(addr, mask, val) wa_add(dev_priv, (addr), (mask), (val))

If you do this, then the whole chain of initing workarounds
can be converted to omitting return value.
-Mika

>  
>  #define WA_SET_BIT_MASKED(addr, mask) \
>  	WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
> @@ -540,7 +569,7 @@ int intel_ctx_workarounds_emit(struct i915_request *rq)
>  
>  	*cs++ = MI_LOAD_REGISTER_IMM(w->count);
>  	for (i = 0; i < w->count; i++) {
> -		*cs++ = i915_mmio_reg_offset(w->reg[i].addr);
> +		*cs++ = w->reg[i].addr;
>  		*cs++ = w->reg[i].value;
>  	}
>  	*cs++ = MI_NOOP;
> -- 
> 2.17.1

Chris Wilson June 15, 2018, 11:34 a.m. UTC | #2

Quoting Mika Kuoppala (2018-06-15 12:29:23)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > For each platform, we have a few registers that rewritten with multiple
> > values -- they are not part of a sequence, just different parts of a
> > masked register set at different times (e.g. platform and gen
> > workarounds). Consolidate these into a single register write to keep the
> > table compact.
> >
> > While adjusting the construction of the wa table, make it non fatal so
> > that the driver still loads but keeping the warning and extra details
> > for inspection.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Oscar Mateo <oscar.mateo@intel.com>
> > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_debugfs.c      | 25 ++--------
> >  drivers/gpu/drm/i915/i915_drv.h          |  2 +-
> >  drivers/gpu/drm/i915/intel_workarounds.c | 63 +++++++++++++++++-------
> >  3 files changed, 52 insertions(+), 38 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> > index c600279d3db5..f78895ffab9b 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -3378,28 +3378,13 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused)
> >  
> >  static int i915_wa_registers(struct seq_file *m, void *unused)
> >  {
> > -     struct drm_i915_private *dev_priv = node_to_i915(m->private);
> > -     struct i915_workarounds *workarounds = &dev_priv->workarounds;
> > +     struct i915_workarounds *wa = &node_to_i915(m->private)->workarounds;
> >       int i;
> >  
> > -     intel_runtime_pm_get(dev_priv);
> > -
> > -     seq_printf(m, "Workarounds applied: %d\n", workarounds->count);
> > -     for (i = 0; i < workarounds->count; ++i) {
> > -             i915_reg_t addr;
> > -             u32 mask, value, read;
> > -             bool ok;
> > -
> > -             addr = workarounds->reg[i].addr;
> > -             mask = workarounds->reg[i].mask;
> > -             value = workarounds->reg[i].value;
> > -             read = I915_READ(addr);
> > -             ok = (value & mask) == (read & mask);
> > -             seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X, read: 0x%08x, status: %s\n",
> > -                        i915_mmio_reg_offset(addr), value, mask, read, ok ? "OK" : "FAIL");
> > -     }
> > -
> > -     intel_runtime_pm_put(dev_priv);
> > +     seq_printf(m, "Workarounds applied: %d\n", wa->count);
> > +     for (i = 0; i < wa->count; ++i)
> > +             seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n",
> > +                        wa->reg[i].addr, wa->reg[i].value, wa->reg[i].mask);
> 
> It seems that gem_workarounds is already smart enough to not
> parse/trust the driver provided read value.
> 
> Perhaps the only value in here was that we saw what
> were context and non context saved ones. But we
> have other tools for that.

We couldn't even rely on it doing that, since what it read would be
random (we may have had a context, the power context may or may not have
the same values, we may never had loaded the values)...

Fortunately, we stopped using them :)

> >       return 0;
> >  }
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 2c12de678e32..91c389622217 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -1308,7 +1308,7 @@ struct i915_frontbuffer_tracking {
> >  };
> >  
> >  struct i915_wa_reg {
> > -     i915_reg_t addr;
> > +     u32 addr;
> >       u32 value;
> >       /* bitmask representing WA bits */
> >       u32 mask;
> > diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
> > index 24b929ce3341..f8bb32e974f6 100644
> > --- a/drivers/gpu/drm/i915/intel_workarounds.c
> > +++ b/drivers/gpu/drm/i915/intel_workarounds.c
> > @@ -48,29 +48,58 @@
> >   * - Public functions to init or apply the given workaround type.
> >   */
> >  
> > -static int wa_add(struct drm_i915_private *dev_priv,
> > -               i915_reg_t addr,
> > -               const u32 mask, const u32 val)
> > +static void wa_add(struct drm_i915_private *i915,
> > +                i915_reg_t reg, const u32 mask, const u32 val)
> >  {
> > -     const unsigned int idx = dev_priv->workarounds.count;
> > +     struct i915_workarounds *wa = &i915->workarounds;
> > +     unsigned int start = 0, end = wa->count;
> > +     unsigned int addr = i915_mmio_reg_offset(reg);
> > +     struct i915_wa_reg *r;
> > +
> > +     while (start < end) {
> > +             unsigned int mid = start + (end - start) / 2;
> > +
> > +             if (wa->reg[mid].addr < addr) {
> > +                     start = mid + 1;
> > +             } else if (wa->reg[mid].addr > addr) {
> > +                     end = mid;
> > +             } else {
> > +                     r = &wa->reg[mid];
> > +
> > +                     if ((mask & ~r->mask) == 0) {
> > +                             DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
> > +                                       addr, r->mask, r->value);
> > +
> > +                             r->value &= ~mask;
> > +                     }
> > +
> > +                     r->value |= val;
> > +                     r->mask  |= mask;
> > +                     return;
> > +             }
> > +     }
> >  
> > -     if (WARN_ON(idx >= I915_MAX_WA_REGS))
> > -             return -ENOSPC;
> > +     if (WARN_ON_ONCE(wa->count >= I915_MAX_WA_REGS)) {
> > +             DRM_ERROR("Dropping w/a for reg %04x (mask: %08x, value: %08x)\n",
> > +                       addr, mask, val);
> > +             return;
> > +     }
> >  
> > -     dev_priv->workarounds.reg[idx].addr = addr;
> > -     dev_priv->workarounds.reg[idx].value = val;
> > -     dev_priv->workarounds.reg[idx].mask = mask;
> > +     r = &wa->reg[wa->count++];
> > +     r->addr  = addr;
> > +     r->value = val;
> > +     r->mask  = mask;
> >  
> > -     dev_priv->workarounds.count++;
> > +     while (r-- > wa->reg) {
> > +             GEM_BUG_ON(r[0].addr == r[1].addr);
> > +             if (r[1].addr > r[0].addr)
> > +                     break;
> 
> Keeping the workarounds in a sorted order by addr,
> could be mentioned in the header. But not insisting
> on that as this is rather clear from the code.
> 
> >  
> > -     return 0;
> > +             swap(r[1], r[0]);
> > +     }
> >  }
> >  
> > -#define WA_REG(addr, mask, val) do { \
> > -             const int r = wa_add(dev_priv, (addr), (mask), (val)); \
> > -             if (r) \
> > -                     return r; \
> > -     } while (0)
> > +#define WA_REG(addr, mask, val) wa_add(dev_priv, (addr), (mask), (val))
> 
> If you do this, then the whole chain of initing workarounds
> can be converted to omitting return value.

I was thinking the same for a followup. As well as not storing the
workarounds but computing them on the fly -- for the normal case, we
only need them once for the initial context image.
-Chris

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index c600279d3db5..f78895ffab9b 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3378,28 +3378,13 @@  static int i915_shared_dplls_info(struct seq_file *m, void *unused)
 
 static int i915_wa_registers(struct seq_file *m, void *unused)
 {
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct i915_workarounds *workarounds = &dev_priv->workarounds;
+	struct i915_workarounds *wa = &node_to_i915(m->private)->workarounds;
 	int i;
 
-	intel_runtime_pm_get(dev_priv);
-
-	seq_printf(m, "Workarounds applied: %d\n", workarounds->count);
-	for (i = 0; i < workarounds->count; ++i) {
-		i915_reg_t addr;
-		u32 mask, value, read;
-		bool ok;
-
-		addr = workarounds->reg[i].addr;
-		mask = workarounds->reg[i].mask;
-		value = workarounds->reg[i].value;
-		read = I915_READ(addr);
-		ok = (value & mask) == (read & mask);
-		seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X, read: 0x%08x, status: %s\n",
-			   i915_mmio_reg_offset(addr), value, mask, read, ok ? "OK" : "FAIL");
-	}
-
-	intel_runtime_pm_put(dev_priv);
+	seq_printf(m, "Workarounds applied: %d\n", wa->count);
+	for (i = 0; i < wa->count; ++i)
+		seq_printf(m, "0x%X: 0x%08X, mask: 0x%08X\n",
+			   wa->reg[i].addr, wa->reg[i].value, wa->reg[i].mask);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2c12de678e32..91c389622217 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1308,7 +1308,7 @@  struct i915_frontbuffer_tracking {
 };
 
 struct i915_wa_reg {
-	i915_reg_t addr;
+	u32 addr;
 	u32 value;
 	/* bitmask representing WA bits */
 	u32 mask;
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
index 24b929ce3341..f8bb32e974f6 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -48,29 +48,58 @@ 
  * - Public functions to init or apply the given workaround type.
  */
 
-static int wa_add(struct drm_i915_private *dev_priv,
-		  i915_reg_t addr,
-		  const u32 mask, const u32 val)
+static void wa_add(struct drm_i915_private *i915,
+		   i915_reg_t reg, const u32 mask, const u32 val)
 {
-	const unsigned int idx = dev_priv->workarounds.count;
+	struct i915_workarounds *wa = &i915->workarounds;
+	unsigned int start = 0, end = wa->count;
+	unsigned int addr = i915_mmio_reg_offset(reg);
+	struct i915_wa_reg *r;
+
+	while (start < end) {
+		unsigned int mid = start + (end - start) / 2;
+
+		if (wa->reg[mid].addr < addr) {
+			start = mid + 1;
+		} else if (wa->reg[mid].addr > addr) {
+			end = mid;
+		} else {
+			r = &wa->reg[mid];
+
+			if ((mask & ~r->mask) == 0) {
+				DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
+					  addr, r->mask, r->value);
+
+				r->value &= ~mask;
+			}
+
+			r->value |= val;
+			r->mask  |= mask;
+			return;
+		}
+	}
 
-	if (WARN_ON(idx >= I915_MAX_WA_REGS))
-		return -ENOSPC;
+	if (WARN_ON_ONCE(wa->count >= I915_MAX_WA_REGS)) {
+		DRM_ERROR("Dropping w/a for reg %04x (mask: %08x, value: %08x)\n",
+			  addr, mask, val);
+		return;
+	}
 
-	dev_priv->workarounds.reg[idx].addr = addr;
-	dev_priv->workarounds.reg[idx].value = val;
-	dev_priv->workarounds.reg[idx].mask = mask;
+	r = &wa->reg[wa->count++];
+	r->addr  = addr;
+	r->value = val;
+	r->mask  = mask;
 
-	dev_priv->workarounds.count++;
+	while (r-- > wa->reg) {
+		GEM_BUG_ON(r[0].addr == r[1].addr);
+		if (r[1].addr > r[0].addr)
+			break;
 
-	return 0;
+		swap(r[1], r[0]);
+	}
 }
 
-#define WA_REG(addr, mask, val) do { \
-		const int r = wa_add(dev_priv, (addr), (mask), (val)); \
-		if (r) \
-			return r; \
-	} while (0)
+#define WA_REG(addr, mask, val) wa_add(dev_priv, (addr), (mask), (val))
 
 #define WA_SET_BIT_MASKED(addr, mask) \
 	WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
@@ -540,7 +569,7 @@  int intel_ctx_workarounds_emit(struct i915_request *rq)
 
 	*cs++ = MI_LOAD_REGISTER_IMM(w->count);
 	for (i = 0; i < w->count; i++) {
-		*cs++ = i915_mmio_reg_offset(w->reg[i].addr);
+		*cs++ = w->reg[i].addr;
 		*cs++ = w->reg[i].value;
 	}
 	*cs++ = MI_NOOP;

[1/2] drm/i915: Keep the ctx workarounds tightly packed

Commit Message

Comments

Patch