diff mbox

[20/62] drm/i915/bdw: Add GTT functions

Message ID 1383451680-11173-21-git-send-email-benjamin.widawsky@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ben Widawsky Nov. 3, 2013, 4:07 a.m. UTC
With the PTE clarifications, the bind and clear functions can now be
added for gen8.

v2: Use for_each_sg_pages in gen8_ggtt_insert_entries.

v3: Drop dev argument to pte encode functions, upstream lost it. Also
rebase on top of the scratch page movement.

v4: Rebase on top of the new address space vfuncs.

v5: Add the bool use_scratch argument to clear_range and the bool valid argument
to the PTE encode function to follow upstream changes.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net> (v1)
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 88 +++++++++++++++++++++++++++++++++++--
 1 file changed, 85 insertions(+), 3 deletions(-)

Comments

Imre Deak Nov. 4, 2013, 10:22 p.m. UTC | #1
On Sat, 2013-11-02 at 21:07 -0700, Ben Widawsky wrote:
> With the PTE clarifications, the bind and clear functions can now be
> added for gen8.
> 
> v2: Use for_each_sg_pages in gen8_ggtt_insert_entries.
> 
> v3: Drop dev argument to pte encode functions, upstream lost it. Also
> rebase on top of the scratch page movement.
> 
> v4: Rebase on top of the new address space vfuncs.
> 
> v5: Add the bool use_scratch argument to clear_range and the bool valid argument
> to the PTE encode function to follow upstream changes.
> 
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net> (v1)
> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>

Reviewed-by: Imre Deak <imre.deak@intel.com>

> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 88 +++++++++++++++++++++++++++++++++++--
>  1 file changed, 85 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 8bf2184..df992dc 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -58,6 +58,15 @@ typedef uint64_t gen8_gtt_pte_t;
>  #define HSW_WB_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0xb)
>  #define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
>  
> +static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
> +					     enum i915_cache_level level,
> +					     bool valid)
> +{
> +	gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
> +	pte |= addr;
> +	return pte;
> +}
> +
>  static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
>  				     enum i915_cache_level level,
>  				     bool valid)
> @@ -576,6 +585,56 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
>  	return 0;
>  }
>  
> +static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
> +{
> +#ifdef writeq
> +	writeq(pte, addr);
> +#else
> +	iowrite32((u32)pte, addr);
> +	iowrite32(pte >> 32, addr + 4);
> +#endif
> +}
> +
> +static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
> +				     struct sg_table *st,
> +				     unsigned int first_entry,
> +				     enum i915_cache_level level)
> +{
> +	struct drm_i915_private *dev_priv = vm->dev->dev_private;
> +	gen8_gtt_pte_t __iomem *gtt_entries =
> +		(gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
> +	int i = 0;
> +	struct sg_page_iter sg_iter;
> +	dma_addr_t addr;
> +
> +	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
> +		addr = sg_dma_address(sg_iter.sg) +
> +			(sg_iter.sg_pgoffset << PAGE_SHIFT);
> +		gen8_set_pte(&gtt_entries[i],
> +			     gen8_pte_encode(addr, level, true));
> +		i++;
> +	}
> +
> +	/* XXX: This serves as a posting read to make sure that the PTE has
> +	 * actually been updated. There is some concern that even though
> +	 * registers and PTEs are within the same BAR that they are potentially
> +	 * of NUMA access patterns. Therefore, even with the way we assume
> +	 * hardware should work, we must keep this posting read for paranoia.
> +	 */
> +	if (i != 0)
> +		WARN_ON(readl(&gtt_entries[i-1])
> +			!= gen8_pte_encode(addr, level, true));
> +
> +#if 0 /* TODO: Still needed on GEN8? */
> +	/* This next bit makes the above posting read even more important. We
> +	 * want to flush the TLBs only after we're certain all the PTE updates
> +	 * have finished.
> +	 */
> +	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
> +	POSTING_READ(GFX_FLSH_CNTL_GEN6);
> +#endif
> +}
> +
>  /*
>   * Binds an object into the global gtt with the specified cache level. The object
>   * will be accessible to the GPU via commands whose operands reference offsets
> @@ -618,6 +677,30 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
>  	POSTING_READ(GFX_FLSH_CNTL_GEN6);
>  }
>  
> +static void gen8_ggtt_clear_range(struct i915_address_space *vm,
> +				  unsigned int first_entry,
> +				  unsigned int num_entries,
> +				  bool use_scratch)
> +{
> +	struct drm_i915_private *dev_priv = vm->dev->dev_private;
> +	gen8_gtt_pte_t scratch_pte, __iomem *gtt_base =
> +		(gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
> +	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
> +	int i;
> +
> +	if (WARN(num_entries > max_entries,
> +		 "First entry = %d; Num entries = %d (max=%d)\n",
> +		 first_entry, num_entries, max_entries))
> +		num_entries = max_entries;
> +
> +	scratch_pte = gen8_pte_encode(vm->scratch.addr,
> +				      I915_CACHE_LLC,
> +				      use_scratch);
> +	for (i = 0; i < num_entries; i++)
> +		gen8_set_pte(&gtt_base[i], scratch_pte);
> +	readl(gtt_base);
> +}
> +
>  static void gen6_ggtt_clear_range(struct i915_address_space *vm,
>  				  unsigned int first_entry,
>  				  unsigned int num_entries,
> @@ -641,7 +724,6 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
>  	readl(gtt_base);
>  }
>  
> -
>  static void i915_ggtt_insert_entries(struct i915_address_space *vm,
>  				     struct sg_table *st,
>  				     unsigned int pg_start,
> @@ -947,8 +1029,8 @@ static int gen8_gmch_probe(struct drm_device *dev,
>  
>  	ret = ggtt_probe_common(dev, gtt_size);
>  
> -	dev_priv->gtt.base.clear_range = NULL;
> -	dev_priv->gtt.base.insert_entries = NULL;
> +	dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
> +	dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
>  
>  	return ret;
>  }
Bloomfield, Jon Nov. 6, 2013, 8:28 a.m. UTC | #2
> -----Original Message-----
> From: intel-gfx-bounces@lists.freedesktop.org [mailto:intel-gfx-
> bounces@lists.freedesktop.org] On Behalf Of Ben Widawsky
> Sent: Sunday, November 03, 2013 4:07 AM
> To: Intel GFX
> Cc: Daniel Vetter; Ben Widawsky; Widawsky, Benjamin
> Subject: [Intel-gfx] [PATCH 20/62] drm/i915/bdw: Add GTT functions
> 
> With the PTE clarifications, the bind and clear functions can now be added for
> gen8.
> 
> v2: Use for_each_sg_pages in gen8_ggtt_insert_entries.
> 
> v3: Drop dev argument to pte encode functions, upstream lost it. Also rebase
> on top of the scratch page movement.
> 
> v4: Rebase on top of the new address space vfuncs.
> 
> v5: Add the bool use_scratch argument to clear_range and the bool valid
> argument to the PTE encode function to follow upstream changes.
> 
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net> (v1)
> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 88
> +++++++++++++++++++++++++++++++++++--
>  1 file changed, 85 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c
> b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 8bf2184..df992dc 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -58,6 +58,15 @@ typedef uint64_t gen8_gtt_pte_t;
>  #define HSW_WB_ELLC_LLC_AGE0
> 	HSW_CACHEABILITY_CONTROL(0xb)
>  #define HSW_WT_ELLC_LLC_AGE0
> 	HSW_CACHEABILITY_CONTROL(0x6)
> 
> +static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
> +					     enum i915_cache_level level,
> +					     bool valid)
> +{
> +	gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
> +	pte |= addr;
> +	return pte;
> +}
> +
>  static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
>  				     enum i915_cache_level level,
>  				     bool valid)
> @@ -576,6 +585,56 @@ int i915_gem_gtt_prepare_object(struct
> drm_i915_gem_object *obj)
>  	return 0;
>  }
> 
> +static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
> +{ #ifdef writeq
> +	writeq(pte, addr);
> +#else
> +	iowrite32((u32)pte, addr);
> +	iowrite32(pte >> 32, addr + 4);
> +#endif
> +}
> +
> +static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
> +				     struct sg_table *st,
> +				     unsigned int first_entry,
> +				     enum i915_cache_level level)
> +{
> +	struct drm_i915_private *dev_priv = vm->dev->dev_private;
> +	gen8_gtt_pte_t __iomem *gtt_entries =
> +		(gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm +
> first_entry;
> +	int i = 0;
> +	struct sg_page_iter sg_iter;
> +	dma_addr_t addr;
> +
> +	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
> +		addr = sg_dma_address(sg_iter.sg) +
> +			(sg_iter.sg_pgoffset << PAGE_SHIFT);
> +		gen8_set_pte(&gtt_entries[i],
> +			     gen8_pte_encode(addr, level, true));
> +		i++;
> +	}
> +
> +	/* XXX: This serves as a posting read to make sure that the PTE has
> +	 * actually been updated. There is some concern that even though
> +	 * registers and PTEs are within the same BAR that they are
> potentially
> +	 * of NUMA access patterns. Therefore, even with the way we
> assume
> +	 * hardware should work, we must keep this posting read for
> paranoia.
> +	 */
> +	if (i != 0)
> +		WARN_ON(readl(&gtt_entries[i-1])
> +			!= gen8_pte_encode(addr, level, true));
Comparing a u32 with a 64-bit page-table entry ?
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 8bf2184..df992dc 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -58,6 +58,15 @@  typedef uint64_t gen8_gtt_pte_t;
 #define HSW_WB_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0xb)
 #define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
 
+static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
+					     enum i915_cache_level level,
+					     bool valid)
+{
+	gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
+	pte |= addr;
+	return pte;
+}
+
 static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
 				     enum i915_cache_level level,
 				     bool valid)
@@ -576,6 +585,56 @@  int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
 	return 0;
 }
 
+static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
+{
+#ifdef writeq
+	writeq(pte, addr);
+#else
+	iowrite32((u32)pte, addr);
+	iowrite32(pte >> 32, addr + 4);
+#endif
+}
+
+static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
+				     struct sg_table *st,
+				     unsigned int first_entry,
+				     enum i915_cache_level level)
+{
+	struct drm_i915_private *dev_priv = vm->dev->dev_private;
+	gen8_gtt_pte_t __iomem *gtt_entries =
+		(gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
+	int i = 0;
+	struct sg_page_iter sg_iter;
+	dma_addr_t addr;
+
+	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
+		addr = sg_dma_address(sg_iter.sg) +
+			(sg_iter.sg_pgoffset << PAGE_SHIFT);
+		gen8_set_pte(&gtt_entries[i],
+			     gen8_pte_encode(addr, level, true));
+		i++;
+	}
+
+	/* XXX: This serves as a posting read to make sure that the PTE has
+	 * actually been updated. There is some concern that even though
+	 * registers and PTEs are within the same BAR that they are potentially
+	 * of NUMA access patterns. Therefore, even with the way we assume
+	 * hardware should work, we must keep this posting read for paranoia.
+	 */
+	if (i != 0)
+		WARN_ON(readl(&gtt_entries[i-1])
+			!= gen8_pte_encode(addr, level, true));
+
+#if 0 /* TODO: Still needed on GEN8? */
+	/* This next bit makes the above posting read even more important. We
+	 * want to flush the TLBs only after we're certain all the PTE updates
+	 * have finished.
+	 */
+	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+	POSTING_READ(GFX_FLSH_CNTL_GEN6);
+#endif
+}
+
 /*
  * Binds an object into the global gtt with the specified cache level. The object
  * will be accessible to the GPU via commands whose operands reference offsets
@@ -618,6 +677,30 @@  static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
 }
 
+static void gen8_ggtt_clear_range(struct i915_address_space *vm,
+				  unsigned int first_entry,
+				  unsigned int num_entries,
+				  bool use_scratch)
+{
+	struct drm_i915_private *dev_priv = vm->dev->dev_private;
+	gen8_gtt_pte_t scratch_pte, __iomem *gtt_base =
+		(gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
+	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
+	int i;
+
+	if (WARN(num_entries > max_entries,
+		 "First entry = %d; Num entries = %d (max=%d)\n",
+		 first_entry, num_entries, max_entries))
+		num_entries = max_entries;
+
+	scratch_pte = gen8_pte_encode(vm->scratch.addr,
+				      I915_CACHE_LLC,
+				      use_scratch);
+	for (i = 0; i < num_entries; i++)
+		gen8_set_pte(&gtt_base[i], scratch_pte);
+	readl(gtt_base);
+}
+
 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 				  unsigned int first_entry,
 				  unsigned int num_entries,
@@ -641,7 +724,6 @@  static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 	readl(gtt_base);
 }
 
-
 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *st,
 				     unsigned int pg_start,
@@ -947,8 +1029,8 @@  static int gen8_gmch_probe(struct drm_device *dev,
 
 	ret = ggtt_probe_common(dev, gtt_size);
 
-	dev_priv->gtt.base.clear_range = NULL;
-	dev_priv->gtt.base.insert_entries = NULL;
+	dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
+	dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
 
 	return ret;
 }