diff mbox

[21/62] drm/i915/bdw: Support BDW caching

Message ID 1383451680-11173-22-git-send-email-benjamin.widawsky@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ben Widawsky Nov. 3, 2013, 4:07 a.m. UTC
BDW caching works differently than the previous generations. Instead of
having bits in the PTE which directly control how the page is cached,
the 3 PTE bits PWT PCD and PAT provide an index into a PAT defined by
register 0x40e0. This style of caching is functionally equivalent to how
it works on HSW and before.

v2: Tiny bikeshed as discussed on internal irc.

v3: Squash in patch from Ville to mirror the x86 PAT setup more like
in arch/x86/mm/pat.c. Primarily, the 0th index will be WB, and not
uncached.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net> (v1)
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 42 +++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_reg.h     |  1 +
 2 files changed, 43 insertions(+)

Comments

Chris Wilson Nov. 4, 2013, 2:39 p.m. UTC | #1
On Sat, Nov 02, 2013 at 09:07:19PM -0700, Ben Widawsky wrote:
> +static void gen8_setup_private_ppat(struct drm_i915_private *dev_priv)
> +{
> +#define GEN8_PPAT_UC		(0<<0)
> +#define GEN8_PPAT_WC		(1<<0)
> +#define GEN8_PPAT_WT		(2<<0)
> +#define GEN8_PPAT_WB		(3<<0)
> +#define GEN8_PPAT_ELLC_OVERRIDE	(0<<2)
> +#define GEN8_PPAT_LLC		(1<<2)
> +#define GEN8_PPAT_LLCELLC	(2<<2)
> +#define GEN8_PPAT_LLCeLLC	(3<<2) /* BSPEC mistake? */
> +#define GEN8_PPAT_AGE(x)	(x<<4)
> +#define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8))
> +	uint64_t pat;
> +
> +	pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
> +	      GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
> +	      GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
> +	      GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached objects, mostly for scanout */
> +	      GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
> +	      GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
> +	      GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
> +	      GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
> +
> +	I915_WRITE(GEN8_PRIVATE_PAT, pat);
> +	I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);

I915_WRITE64() or a scary-ass comment to explain why we cannot.
-Chris
Imre Deak Nov. 5, 2013, 3:19 p.m. UTC | #2
On Sat, 2013-11-02 at 21:07 -0700, Ben Widawsky wrote:
> BDW caching works differently than the previous generations. Instead of
> having bits in the PTE which directly control how the page is cached,
> the 3 PTE bits PWT PCD and PAT provide an index into a PAT defined by
> register 0x40e0. This style of caching is functionally equivalent to how
> it works on HSW and before.
> 
> v2: Tiny bikeshed as discussed on internal irc.
> 
> v3: Squash in patch from Ville to mirror the x86 PAT setup more like
> in arch/x86/mm/pat.c. Primarily, the 0th index will be WB, and not
> uncached.
> 
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net> (v1)
> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 42 +++++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_reg.h     |  1 +
>  2 files changed, 43 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index df992dc..02de12d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -58,12 +58,21 @@ typedef uint64_t gen8_gtt_pte_t;
>  #define HSW_WB_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0xb)
>  #define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
>  
> +#define PPAT_UNCACHED_INDEX		(_PAGE_PWT | _PAGE_PCD)
> +#define PPAT_CACHED_PDE_INDEX		0 /* WB LLC */
> +#define PPAT_CACHED_INDEX		_PAGE_PAT /* WB LLCeLLC */
> +#define PPAT_DISPLAY_ELLC_INDEX		_PAGE_PCD /* WT eLLC */
> +
>  static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
>  					     enum i915_cache_level level,
>  					     bool valid)
>  {
>  	gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
>  	pte |= addr;
> +	if (level != I915_CACHE_NONE)
> +		pte |= PPAT_CACHED_INDEX;
> +	else
> +		pte |= PPAT_UNCACHED_INDEX;
>  	return pte;
>  }
>  
> @@ -805,6 +814,7 @@ static void i915_gtt_color_adjust(struct drm_mm_node *node,
>  			*end -= 4096;
>  	}
>  }
> +
>  void i915_gem_setup_global_gtt(struct drm_device *dev,
>  			       unsigned long start,
>  			       unsigned long mappable_end,
> @@ -1002,6 +1012,36 @@ static int ggtt_probe_common(struct drm_device *dev,
>  	return ret;
>  }
>  
> +/* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
> + * bits. When using advanced contexts each context stores its own PAT, but
> + * writing this data shouldn't be harmful even in those cases. */
> +static void gen8_setup_private_ppat(struct drm_i915_private *dev_priv)
> +{
> +#define GEN8_PPAT_UC		(0<<0)
> +#define GEN8_PPAT_WC		(1<<0)
> +#define GEN8_PPAT_WT		(2<<0)
> +#define GEN8_PPAT_WB		(3<<0)
> +#define GEN8_PPAT_ELLC_OVERRIDE	(0<<2)
> +#define GEN8_PPAT_LLC		(1<<2)
> +#define GEN8_PPAT_LLCELLC	(2<<2)
> +#define GEN8_PPAT_LLCeLLC	(3<<2) /* BSPEC mistake? */

The LLC, LLCELLC encodings don't match the bspec either. If the above
are the correct values it would be nice to have a comment after those
too. Otherwise looks ok:

Reviewed-by: Imre Deak <imre.deak@intel.com>

> +#define GEN8_PPAT_AGE(x)	(x<<4)
> +#define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8))
> +	uint64_t pat;
> +
> +	pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
> +	      GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
> +	      GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
> +	      GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached objects, mostly for scanout */
> +	      GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
> +	      GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
> +	      GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
> +	      GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
> +
> +	I915_WRITE(GEN8_PRIVATE_PAT, pat);
> +	I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
> +}
> +
>  static int gen8_gmch_probe(struct drm_device *dev,
>  			   size_t *gtt_total,
>  			   size_t *stolen,
> @@ -1027,6 +1067,8 @@ static int gen8_gmch_probe(struct drm_device *dev,
>  	gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
>  	*gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT;
>  
> +	gen8_setup_private_ppat(dev_priv);
> +
>  	ret = ggtt_probe_common(dev, gtt_size);
>  
>  	dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index b801b88..9929750 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -664,6 +664,7 @@
>  #define   RING_FAULT_FAULT_TYPE(x) ((x >> 1) & 0x3)
>  #define   RING_FAULT_VALID	(1<<0)
>  #define DONE_REG		0x40b0
> +#define GEN8_PRIVATE_PAT	0x40e0
>  #define BSD_HWS_PGA_GEN7	(0x04180)
>  #define BLT_HWS_PGA_GEN7	(0x04280)
>  #define VEBOX_HWS_PGA_GEN7	(0x04380)
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index df992dc..02de12d 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -58,12 +58,21 @@  typedef uint64_t gen8_gtt_pte_t;
 #define HSW_WB_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0xb)
 #define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
 
+#define PPAT_UNCACHED_INDEX		(_PAGE_PWT | _PAGE_PCD)
+#define PPAT_CACHED_PDE_INDEX		0 /* WB LLC */
+#define PPAT_CACHED_INDEX		_PAGE_PAT /* WB LLCeLLC */
+#define PPAT_DISPLAY_ELLC_INDEX		_PAGE_PCD /* WT eLLC */
+
 static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
 					     enum i915_cache_level level,
 					     bool valid)
 {
 	gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
 	pte |= addr;
+	if (level != I915_CACHE_NONE)
+		pte |= PPAT_CACHED_INDEX;
+	else
+		pte |= PPAT_UNCACHED_INDEX;
 	return pte;
 }
 
@@ -805,6 +814,7 @@  static void i915_gtt_color_adjust(struct drm_mm_node *node,
 			*end -= 4096;
 	}
 }
+
 void i915_gem_setup_global_gtt(struct drm_device *dev,
 			       unsigned long start,
 			       unsigned long mappable_end,
@@ -1002,6 +1012,36 @@  static int ggtt_probe_common(struct drm_device *dev,
 	return ret;
 }
 
+/* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
+ * bits. When using advanced contexts each context stores its own PAT, but
+ * writing this data shouldn't be harmful even in those cases. */
+static void gen8_setup_private_ppat(struct drm_i915_private *dev_priv)
+{
+#define GEN8_PPAT_UC		(0<<0)
+#define GEN8_PPAT_WC		(1<<0)
+#define GEN8_PPAT_WT		(2<<0)
+#define GEN8_PPAT_WB		(3<<0)
+#define GEN8_PPAT_ELLC_OVERRIDE	(0<<2)
+#define GEN8_PPAT_LLC		(1<<2)
+#define GEN8_PPAT_LLCELLC	(2<<2)
+#define GEN8_PPAT_LLCeLLC	(3<<2) /* BSPEC mistake? */
+#define GEN8_PPAT_AGE(x)	(x<<4)
+#define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8))
+	uint64_t pat;
+
+	pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
+	      GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
+	      GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
+	      GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached objects, mostly for scanout */
+	      GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
+	      GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
+	      GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
+	      GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
+
+	I915_WRITE(GEN8_PRIVATE_PAT, pat);
+	I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
+}
+
 static int gen8_gmch_probe(struct drm_device *dev,
 			   size_t *gtt_total,
 			   size_t *stolen,
@@ -1027,6 +1067,8 @@  static int gen8_gmch_probe(struct drm_device *dev,
 	gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
 	*gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT;
 
+	gen8_setup_private_ppat(dev_priv);
+
 	ret = ggtt_probe_common(dev, gtt_size);
 
 	dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index b801b88..9929750 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -664,6 +664,7 @@ 
 #define   RING_FAULT_FAULT_TYPE(x) ((x >> 1) & 0x3)
 #define   RING_FAULT_VALID	(1<<0)
 #define DONE_REG		0x40b0
+#define GEN8_PRIVATE_PAT	0x40e0
 #define BSD_HWS_PGA_GEN7	(0x04180)
 #define BLT_HWS_PGA_GEN7	(0x04280)
 #define VEBOX_HWS_PGA_GEN7	(0x04380)