diff mbox

[01/12] drm/i915: Add support for mapping an object page by page

Message ID 1461151059-16361-2-git-send-email-ankitprasad.r.sharma@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

ankitprasad.r.sharma@intel.com April 20, 2016, 11:17 a.m. UTC
From: Chris Wilson <chris@chris-wilson.co.uk>

Introduced a new vm specfic callback insert_page() to program a single pte in
ggtt or ppgtt. This allows us to map a single page in to the mappable aperture
space. This can be iterated over to access the whole object by using space as
meagre as page size.

v2: Added low level rpm assertions to insert_page routines (Chris)

v3: Added POSTING_READ post register write (Tvrtko)

v4: Rebase (Ankit)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/char/agp/intel-gtt.c        |  9 +++++
 drivers/gpu/drm/i915/i915_gem_gtt.c | 67 +++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_gtt.h |  5 +++
 include/drm/intel-gtt.h             |  3 ++
 4 files changed, 84 insertions(+)

Comments

Chris Wilson April 20, 2016, 12:04 p.m. UTC | #1
On Wed, Apr 20, 2016 at 04:47:28PM +0530, ankitprasad.r.sharma@intel.com wrote:
> From: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Introduced a new vm specfic callback insert_page() to program a single pte in
> ggtt or ppgtt. This allows us to map a single page in to the mappable aperture
> space. This can be iterated over to access the whole object by using space as
> meagre as page size.
> 
> v2: Added low level rpm assertions to insert_page routines (Chris)
> 
> v3: Added POSTING_READ post register write (Tvrtko)
> 
> v4: Rebase (Ankit)
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  drivers/char/agp/intel-gtt.c        |  9 +++++
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 67 +++++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_gem_gtt.h |  5 +++
>  include/drm/intel-gtt.h             |  3 ++
>  4 files changed, 84 insertions(+)
> 
> diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
> index aef87fd..cea0ae3 100644
> --- a/drivers/char/agp/intel-gtt.c
> +++ b/drivers/char/agp/intel-gtt.c
> @@ -840,6 +840,15 @@ static bool i830_check_flags(unsigned int flags)
>  	return false;
>  }
>  
> +void intel_gtt_insert_page(dma_addr_t addr,
> +			   unsigned int pg,
> +			   unsigned int flags)
> +{
> +	intel_private.driver->write_entry(addr, pg, flags);
> +	wmb();
> +}
> +EXPORT_SYMBOL(intel_gtt_insert_page);
> +
>  void intel_gtt_insert_sg_entries(struct sg_table *st,
>  				 unsigned int pg_start,
>  				 unsigned int flags)
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 9f165fe..da1819d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2353,6 +2353,29 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
>  #endif
>  }
>  
> +static void gen8_ggtt_insert_page(struct i915_address_space *vm,
> +				  dma_addr_t addr,
> +				  uint64_t offset,
> +				  enum i915_cache_level level,
> +				  u32 unused)
> +{
> +	struct drm_i915_private *dev_priv = to_i915(vm->dev);
> +	gen8_pte_t __iomem *pte =
> +		(gen8_pte_t __iomem *)dev_priv->ggtt.gsm +
> +		(offset >> PAGE_SHIFT);
> +	int rpm_atomic_seq;
> +
> +	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
> +
> +	gen8_set_pte(pte, gen8_pte_encode(addr, level, true));
> +	wmb();
> +
> +	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
> +	POSTING_READ(GFX_FLSH_CNTL_GEN6);

Actually, we don't need these at all for the insert-page API (current
users in pread/pwrite/reloc/gpu-error-capture) as they go through the
GTT and not the GPU TLB. I would much rather we punt these to the caller
with a vm->flush() which we can add later. When I say later, we already
have the GUC pretending to be the GGTT and adding code where it doesn't
belong...

All we need here is the wmb() to order the PTE write (and flush the WCB)
with the use afterwards. The caller has to provide the wmb() to order
its writes before a subsequent PTE change.

In a few patches time we can also kill the rpm_atomic asserts and the
dev_priv local.
-Chris
ankitprasad.r.sharma@intel.com May 24, 2016, 8:17 a.m. UTC | #2
On Wed, 2016-04-20 at 13:04 +0100, Chris Wilson wrote:
> On Wed, Apr 20, 2016 at 04:47:28PM +0530, ankitprasad.r.sharma@intel.com wrote:
> > From: Chris Wilson <chris@chris-wilson.co.uk>
> > 
> > Introduced a new vm specfic callback insert_page() to program a single pte in
> > ggtt or ppgtt. This allows us to map a single page in to the mappable aperture
> > space. This can be iterated over to access the whole object by using space as
> > meagre as page size.
> > 
> > v2: Added low level rpm assertions to insert_page routines (Chris)
> > 
> > v3: Added POSTING_READ post register write (Tvrtko)
> > 
> > v4: Rebase (Ankit)
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >  drivers/char/agp/intel-gtt.c        |  9 +++++
> >  drivers/gpu/drm/i915/i915_gem_gtt.c | 67 +++++++++++++++++++++++++++++++++++++
> >  drivers/gpu/drm/i915/i915_gem_gtt.h |  5 +++
> >  include/drm/intel-gtt.h             |  3 ++
> >  4 files changed, 84 insertions(+)
> > 
> > diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
> > index aef87fd..cea0ae3 100644
> > --- a/drivers/char/agp/intel-gtt.c
> > +++ b/drivers/char/agp/intel-gtt.c
> > @@ -840,6 +840,15 @@ static bool i830_check_flags(unsigned int flags)
> >  	return false;
> >  }
> >  
> > +void intel_gtt_insert_page(dma_addr_t addr,
> > +			   unsigned int pg,
> > +			   unsigned int flags)
> > +{
> > +	intel_private.driver->write_entry(addr, pg, flags);
> > +	wmb();
> > +}
> > +EXPORT_SYMBOL(intel_gtt_insert_page);
> > +
> >  void intel_gtt_insert_sg_entries(struct sg_table *st,
> >  				 unsigned int pg_start,
> >  				 unsigned int flags)
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index 9f165fe..da1819d 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -2353,6 +2353,29 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
> >  #endif
> >  }
> >  
> > +static void gen8_ggtt_insert_page(struct i915_address_space *vm,
> > +				  dma_addr_t addr,
> > +				  uint64_t offset,
> > +				  enum i915_cache_level level,
> > +				  u32 unused)
> > +{
> > +	struct drm_i915_private *dev_priv = to_i915(vm->dev);
> > +	gen8_pte_t __iomem *pte =
> > +		(gen8_pte_t __iomem *)dev_priv->ggtt.gsm +
> > +		(offset >> PAGE_SHIFT);
> > +	int rpm_atomic_seq;
> > +
> > +	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
> > +
> > +	gen8_set_pte(pte, gen8_pte_encode(addr, level, true));
> > +	wmb();
> > +
> > +	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
> > +	POSTING_READ(GFX_FLSH_CNTL_GEN6);
> 
> Actually, we don't need these at all for the insert-page API (current
> users in pread/pwrite/reloc/gpu-error-capture) as they go through the
> GTT and not the GPU TLB. I would much rather we punt these to the caller
> with a vm->flush() which we can add later. When I say later, we already
> have the GUC pretending to be the GGTT and adding code where it doesn't
> belong...

I was trying to test the stolen memory patches on SKL with the removed
FLSH_CNTL write. Apparently, the insert_page routine does not seem to be
working. As I was verifying if the stolen-backed object is zeroed out
(using i915_gem_object_clear, which makes use of insert_page) on
creation but it failed (I was getting a garbage value instead of zeros).
And on adding this write it works as expected.

I think this write cannot be removed after all.

Thanks,
Ankit
> 
> All we need here is the wmb() to order the PTE write (and flush the WCB)
> with the use afterwards. The caller has to provide the wmb() to order
> its writes before a subsequent PTE change.
> 
> In a few patches time we can also kill the rpm_atomic asserts and the
> dev_priv local.
> -Chris
>
diff mbox

Patch

diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index aef87fd..cea0ae3 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -840,6 +840,15 @@  static bool i830_check_flags(unsigned int flags)
 	return false;
 }
 
+void intel_gtt_insert_page(dma_addr_t addr,
+			   unsigned int pg,
+			   unsigned int flags)
+{
+	intel_private.driver->write_entry(addr, pg, flags);
+	wmb();
+}
+EXPORT_SYMBOL(intel_gtt_insert_page);
+
 void intel_gtt_insert_sg_entries(struct sg_table *st,
 				 unsigned int pg_start,
 				 unsigned int flags)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 9f165fe..da1819d 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2353,6 +2353,29 @@  static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
 #endif
 }
 
+static void gen8_ggtt_insert_page(struct i915_address_space *vm,
+				  dma_addr_t addr,
+				  uint64_t offset,
+				  enum i915_cache_level level,
+				  u32 unused)
+{
+	struct drm_i915_private *dev_priv = to_i915(vm->dev);
+	gen8_pte_t __iomem *pte =
+		(gen8_pte_t __iomem *)dev_priv->ggtt.gsm +
+		(offset >> PAGE_SHIFT);
+	int rpm_atomic_seq;
+
+	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
+
+	gen8_set_pte(pte, gen8_pte_encode(addr, level, true));
+	wmb();
+
+	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+	POSTING_READ(GFX_FLSH_CNTL_GEN6);
+
+	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
+}
+
 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *st,
 				     uint64_t start,
@@ -2425,6 +2448,29 @@  static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm,
 	stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL);
 }
 
+static void gen6_ggtt_insert_page(struct i915_address_space *vm,
+				  dma_addr_t addr,
+				  uint64_t offset,
+				  enum i915_cache_level level,
+				  u32 flags)
+{
+	struct drm_i915_private *dev_priv = to_i915(vm->dev);
+	gen6_pte_t __iomem *pte =
+		(gen6_pte_t __iomem *)dev_priv->ggtt.gsm +
+		(offset >> PAGE_SHIFT);
+	int rpm_atomic_seq;
+
+	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
+
+	iowrite32(vm->pte_encode(addr, level, true, flags), pte);
+	wmb();
+
+	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+	POSTING_READ(GFX_FLSH_CNTL_GEN6);
+
+	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
+}
+
 /*
  * Binds an object into the global gtt with the specified cache level. The object
  * will be accessible to the GPU via commands whose operands reference offsets
@@ -2539,6 +2585,24 @@  static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
 }
 
+static void i915_ggtt_insert_page(struct i915_address_space *vm,
+				  dma_addr_t addr,
+				  uint64_t offset,
+				  enum i915_cache_level cache_level,
+				  u32 unused)
+{
+	struct drm_i915_private *dev_priv = to_i915(vm->dev);
+	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
+	int rpm_atomic_seq;
+
+	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
+
+	intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
+
+	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
+}
+
 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *pages,
 				     uint64_t start,
@@ -3071,6 +3135,7 @@  static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 	ret = ggtt_probe_common(dev, ggtt->size);
 
 	ggtt->base.clear_range = gen8_ggtt_clear_range;
+	ggtt->base.insert_page = gen8_ggtt_insert_page;
 	if (IS_CHERRYVIEW(dev_priv))
 		ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL;
 	else
@@ -3109,6 +3174,7 @@  static int gen6_gmch_probe(struct i915_ggtt *ggtt)
 	ret = ggtt_probe_common(dev, ggtt->size);
 
 	ggtt->base.clear_range = gen6_ggtt_clear_range;
+	ggtt->base.insert_page = gen6_ggtt_insert_page;
 	ggtt->base.insert_entries = gen6_ggtt_insert_entries;
 	ggtt->base.bind_vma = ggtt_bind_vma;
 	ggtt->base.unbind_vma = ggtt_unbind_vma;
@@ -3140,6 +3206,7 @@  static int i915_gmch_probe(struct i915_ggtt *ggtt)
 		      &ggtt->mappable_base, &ggtt->mappable_end);
 
 	ggtt->do_idle_maps = needs_idle_maps(dev_priv->dev);
+	ggtt->base.insert_page = i915_ggtt_insert_page;
 	ggtt->base.insert_entries = i915_ggtt_insert_entries;
 	ggtt->base.clear_range = i915_ggtt_clear_range;
 	ggtt->base.bind_vma = ggtt_bind_vma;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index d7dd3d8..2982a6a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -316,6 +316,11 @@  struct i915_address_space {
 			    uint64_t start,
 			    uint64_t length,
 			    bool use_scratch);
+	void (*insert_page)(struct i915_address_space *vm,
+			    dma_addr_t addr,
+			    uint64_t offset,
+			    enum i915_cache_level cache_level,
+			    u32 flags);
 	void (*insert_entries)(struct i915_address_space *vm,
 			       struct sg_table *st,
 			       uint64_t start,
diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h
index 9e9bddaa5..f49edec 100644
--- a/include/drm/intel-gtt.h
+++ b/include/drm/intel-gtt.h
@@ -13,6 +13,9 @@  void intel_gmch_remove(void);
 bool intel_enable_gtt(void);
 
 void intel_gtt_chipset_flush(void);
+void intel_gtt_insert_page(dma_addr_t addr,
+			   unsigned int pg,
+			   unsigned int flags);
 void intel_gtt_insert_sg_entries(struct sg_table *st,
 				 unsigned int pg_start,
 				 unsigned int flags);