diff mbox

[v2] drm/i915: add WaIncreaseDefaultTLBEntries

Message ID 1453911122-32477-1-git-send-email-tim.gore@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

tim.gore@intel.com Jan. 27, 2016, 4:12 p.m. UTC
From: Tim Gore <tim.gore@intel.com>

WaIncreaseDefaultTLBEntries increases the number of TLB
entries available for GPGPU workloads and gives significant
( > 10% ) performance gain for some OCL benchmarks.
Put this in a new function that can be a place for
workarounds that are GT related but not required per ring.
This function is called on driver load and also after a
reset and on resume, so it is safe for workarounds that get
clobbered in these situations. This function currently has
just this one workaround.

v2: This was originally split into 3 patches but following
  review feedback was squashed into 1.
  I have not incorporated some style comments from Chris
  Wilson as I felt that after defining and intialising a
  temporary variable and then adding an additional if block
  to only write the register if the temporary variable had
  been set, this didn't really give a net gain.

Signed-off-by: Tim Gore <tim.gore@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 21 +++++++++++++++++++++
 drivers/gpu/drm/i915/i915_reg.h     |  7 +++++++
 2 files changed, 28 insertions(+)

Comments

Mika Kuoppala Jan. 29, 2016, 8:54 a.m. UTC | #1
tim.gore@intel.com writes:

> From: Tim Gore <tim.gore@intel.com>
>
> WaIncreaseDefaultTLBEntries increases the number of TLB
> entries available for GPGPU workloads and gives significant
> ( > 10% ) performance gain for some OCL benchmarks.
> Put this in a new function that can be a place for
> workarounds that are GT related but not required per ring.
> This function is called on driver load and also after a
> reset and on resume, so it is safe for workarounds that get
> clobbered in these situations. This function currently has
> just this one workaround.
>
> v2: This was originally split into 3 patches but following
>   review feedback was squashed into 1.
>   I have not incorporated some style comments from Chris
>   Wilson as I felt that after defining and intialising a
>   temporary variable and then adding an additional if block
>   to only write the register if the temporary variable had
>   been set, this didn't really give a net gain.
>
> Signed-off-by: Tim Gore <tim.gore@intel.com>

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 21 +++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_reg.h     |  7 +++++++
>  2 files changed, 28 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 7377b67..5cd5f8b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2132,6 +2132,25 @@ static void i915_address_space_init(struct i915_address_space *vm,
>  	list_add_tail(&vm->global_link, &dev_priv->vm_list);
>  }
>  
> +static void gtt_write_workarounds(struct drm_device *dev)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +
> +	/* This function is for gtt related workarounds. This function is
> +	 * called on driver load and after a GPU reset, so you can place
> +	 * workarounds here even if they get overwritten by GPU reset.
> +	 */
> +	/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */
> +	if (IS_BROADWELL(dev))
> +		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
> +	else if (IS_CHERRYVIEW(dev))
> +		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
> +	else if (IS_SKYLAKE(dev))
> +		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
> +	else if (IS_BROXTON(dev))
> +		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
> +}
> +
>  int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -2148,6 +2167,8 @@ int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
>  
>  int i915_ppgtt_init_hw(struct drm_device *dev)
>  {
> +	gtt_write_workarounds(dev);
> +
>  	/* In the case of execlists, PPGTT is enabled by the context descriptor
>  	 * and the PDPs are contained within the context itself.  We don't
>  	 * need to do anything here. */
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 65e32a3..0ab6312 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -8170,4 +8170,11 @@ enum skl_disp_power_wells {
>  #define GEN9_VEBOX_MOCS(i)	_MMIO(0xcb00 + (i) * 4)	/* Video MOCS registers */
>  #define GEN9_BLT_MOCS(i)	_MMIO(0xcc00 + (i) * 4)	/* Blitter MOCS registers */
>  
> +/* gamt regs */
> +#define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4)
> +#define   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW  0x67F1427F /* max/min for LRA1/2 */
> +#define   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV  0x5FF101FF /* max/min for LRA1/2 */
> +#define   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL  0x67F1427F /*    "        " */
> +#define   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT  0x5FF101FF /*    "        " */
> +
>  #endif /* _I915_REG_H_ */
> -- 
> 1.9.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 7377b67..5cd5f8b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2132,6 +2132,25 @@  static void i915_address_space_init(struct i915_address_space *vm,
 	list_add_tail(&vm->global_link, &dev_priv->vm_list);
 }
 
+static void gtt_write_workarounds(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	/* This function is for gtt related workarounds. This function is
+	 * called on driver load and after a GPU reset, so you can place
+	 * workarounds here even if they get overwritten by GPU reset.
+	 */
+	/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */
+	if (IS_BROADWELL(dev))
+		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
+	else if (IS_CHERRYVIEW(dev))
+		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
+	else if (IS_SKYLAKE(dev))
+		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
+	else if (IS_BROXTON(dev))
+		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
+}
+
 int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2148,6 +2167,8 @@  int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
 
 int i915_ppgtt_init_hw(struct drm_device *dev)
 {
+	gtt_write_workarounds(dev);
+
 	/* In the case of execlists, PPGTT is enabled by the context descriptor
 	 * and the PDPs are contained within the context itself.  We don't
 	 * need to do anything here. */
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 65e32a3..0ab6312 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -8170,4 +8170,11 @@  enum skl_disp_power_wells {
 #define GEN9_VEBOX_MOCS(i)	_MMIO(0xcb00 + (i) * 4)	/* Video MOCS registers */
 #define GEN9_BLT_MOCS(i)	_MMIO(0xcc00 + (i) * 4)	/* Blitter MOCS registers */
 
+/* gamt regs */
+#define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4)
+#define   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW  0x67F1427F /* max/min for LRA1/2 */
+#define   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV  0x5FF101FF /* max/min for LRA1/2 */
+#define   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL  0x67F1427F /*    "        " */
+#define   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT  0x5FF101FF /*    "        " */
+
 #endif /* _I915_REG_H_ */