Message ID | 1461151059-16361-2-git-send-email-ankitprasad.r.sharma@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Apr 20, 2016 at 04:47:28PM +0530, ankitprasad.r.sharma@intel.com wrote: > From: Chris Wilson <chris@chris-wilson.co.uk> > > Introduced a new vm specfic callback insert_page() to program a single pte in > ggtt or ppgtt. This allows us to map a single page in to the mappable aperture > space. This can be iterated over to access the whole object by using space as > meagre as page size. > > v2: Added low level rpm assertions to insert_page routines (Chris) > > v3: Added POSTING_READ post register write (Tvrtko) > > v4: Rebase (Ankit) > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com> > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > drivers/char/agp/intel-gtt.c | 9 +++++ > drivers/gpu/drm/i915/i915_gem_gtt.c | 67 +++++++++++++++++++++++++++++++++++++ > drivers/gpu/drm/i915/i915_gem_gtt.h | 5 +++ > include/drm/intel-gtt.h | 3 ++ > 4 files changed, 84 insertions(+) > > diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c > index aef87fd..cea0ae3 100644 > --- a/drivers/char/agp/intel-gtt.c > +++ b/drivers/char/agp/intel-gtt.c > @@ -840,6 +840,15 @@ static bool i830_check_flags(unsigned int flags) > return false; > } > > +void intel_gtt_insert_page(dma_addr_t addr, > + unsigned int pg, > + unsigned int flags) > +{ > + intel_private.driver->write_entry(addr, pg, flags); > + wmb(); > +} > +EXPORT_SYMBOL(intel_gtt_insert_page); > + > void intel_gtt_insert_sg_entries(struct sg_table *st, > unsigned int pg_start, > unsigned int flags) > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 9f165fe..da1819d 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -2353,6 +2353,29 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) > #endif > } > > +static void gen8_ggtt_insert_page(struct i915_address_space *vm, > + dma_addr_t addr, > + uint64_t offset, > + enum i915_cache_level level, > + u32 unused) > +{ > + struct drm_i915_private *dev_priv = to_i915(vm->dev); > + gen8_pte_t __iomem *pte = > + (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + > + (offset >> PAGE_SHIFT); > + int rpm_atomic_seq; > + > + rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); > + > + gen8_set_pte(pte, gen8_pte_encode(addr, level, true)); > + wmb(); > + > + I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); > + POSTING_READ(GFX_FLSH_CNTL_GEN6); Actually, we don't need these at all for the insert-page API (current users in pread/pwrite/reloc/gpu-error-capture) as they go through the GTT and not the GPU TLB. I would much rather we punt these to the caller with a vm->flush() which we can add later. When I say later, we already have the GUC pretending to be the GGTT and adding code where it doesn't belong... All we need here is the wmb() to order the PTE write (and flush the WCB) with the use afterwards. The caller has to provide the wmb() to order its writes before a subsequent PTE change. In a few patches time we can also kill the rpm_atomic asserts and the dev_priv local. -Chris
On Wed, 2016-04-20 at 13:04 +0100, Chris Wilson wrote: > On Wed, Apr 20, 2016 at 04:47:28PM +0530, ankitprasad.r.sharma@intel.com wrote: > > From: Chris Wilson <chris@chris-wilson.co.uk> > > > > Introduced a new vm specfic callback insert_page() to program a single pte in > > ggtt or ppgtt. This allows us to map a single page in to the mappable aperture > > space. This can be iterated over to access the whole object by using space as > > meagre as page size. > > > > v2: Added low level rpm assertions to insert_page routines (Chris) > > > > v3: Added POSTING_READ post register write (Tvrtko) > > > > v4: Rebase (Ankit) > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com> > > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > --- > > drivers/char/agp/intel-gtt.c | 9 +++++ > > drivers/gpu/drm/i915/i915_gem_gtt.c | 67 +++++++++++++++++++++++++++++++++++++ > > drivers/gpu/drm/i915/i915_gem_gtt.h | 5 +++ > > include/drm/intel-gtt.h | 3 ++ > > 4 files changed, 84 insertions(+) > > > > diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c > > index aef87fd..cea0ae3 100644 > > --- a/drivers/char/agp/intel-gtt.c > > +++ b/drivers/char/agp/intel-gtt.c > > @@ -840,6 +840,15 @@ static bool i830_check_flags(unsigned int flags) > > return false; > > } > > > > +void intel_gtt_insert_page(dma_addr_t addr, > > + unsigned int pg, > > + unsigned int flags) > > +{ > > + intel_private.driver->write_entry(addr, pg, flags); > > + wmb(); > > +} > > +EXPORT_SYMBOL(intel_gtt_insert_page); > > + > > void intel_gtt_insert_sg_entries(struct sg_table *st, > > unsigned int pg_start, > > unsigned int flags) > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > > index 9f165fe..da1819d 100644 > > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > > @@ -2353,6 +2353,29 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) > > #endif > > } > > > > +static void gen8_ggtt_insert_page(struct i915_address_space *vm, > > + dma_addr_t addr, > > + uint64_t offset, > > + enum i915_cache_level level, > > + u32 unused) > > +{ > > + struct drm_i915_private *dev_priv = to_i915(vm->dev); > > + gen8_pte_t __iomem *pte = > > + (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + > > + (offset >> PAGE_SHIFT); > > + int rpm_atomic_seq; > > + > > + rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); > > + > > + gen8_set_pte(pte, gen8_pte_encode(addr, level, true)); > > + wmb(); > > + > > + I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); > > + POSTING_READ(GFX_FLSH_CNTL_GEN6); > > Actually, we don't need these at all for the insert-page API (current > users in pread/pwrite/reloc/gpu-error-capture) as they go through the > GTT and not the GPU TLB. I would much rather we punt these to the caller > with a vm->flush() which we can add later. When I say later, we already > have the GUC pretending to be the GGTT and adding code where it doesn't > belong... I was trying to test the stolen memory patches on SKL with the removed FLSH_CNTL write. Apparently, the insert_page routine does not seem to be working. As I was verifying if the stolen-backed object is zeroed out (using i915_gem_object_clear, which makes use of insert_page) on creation but it failed (I was getting a garbage value instead of zeros). And on adding this write it works as expected. I think this write cannot be removed after all. Thanks, Ankit > > All we need here is the wmb() to order the PTE write (and flush the WCB) > with the use afterwards. The caller has to provide the wmb() to order > its writes before a subsequent PTE change. > > In a few patches time we can also kill the rpm_atomic asserts and the > dev_priv local. > -Chris >
diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index aef87fd..cea0ae3 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -840,6 +840,15 @@ static bool i830_check_flags(unsigned int flags) return false; } +void intel_gtt_insert_page(dma_addr_t addr, + unsigned int pg, + unsigned int flags) +{ + intel_private.driver->write_entry(addr, pg, flags); + wmb(); +} +EXPORT_SYMBOL(intel_gtt_insert_page); + void intel_gtt_insert_sg_entries(struct sg_table *st, unsigned int pg_start, unsigned int flags) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 9f165fe..da1819d 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2353,6 +2353,29 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) #endif } +static void gen8_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + uint64_t offset, + enum i915_cache_level level, + u32 unused) +{ + struct drm_i915_private *dev_priv = to_i915(vm->dev); + gen8_pte_t __iomem *pte = + (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + + (offset >> PAGE_SHIFT); + int rpm_atomic_seq; + + rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); + + gen8_set_pte(pte, gen8_pte_encode(addr, level, true)); + wmb(); + + I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); + POSTING_READ(GFX_FLSH_CNTL_GEN6); + + assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); +} + static void gen8_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *st, uint64_t start, @@ -2425,6 +2448,29 @@ static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL); } +static void gen6_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + uint64_t offset, + enum i915_cache_level level, + u32 flags) +{ + struct drm_i915_private *dev_priv = to_i915(vm->dev); + gen6_pte_t __iomem *pte = + (gen6_pte_t __iomem *)dev_priv->ggtt.gsm + + (offset >> PAGE_SHIFT); + int rpm_atomic_seq; + + rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); + + iowrite32(vm->pte_encode(addr, level, true, flags), pte); + wmb(); + + I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); + POSTING_READ(GFX_FLSH_CNTL_GEN6); + + assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); +} + /* * Binds an object into the global gtt with the specified cache level. The object * will be accessible to the GPU via commands whose operands reference offsets @@ -2539,6 +2585,24 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); } +static void i915_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + uint64_t offset, + enum i915_cache_level cache_level, + u32 unused) +{ + struct drm_i915_private *dev_priv = to_i915(vm->dev); + unsigned int flags = (cache_level == I915_CACHE_NONE) ? + AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; + int rpm_atomic_seq; + + rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); + + intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); + + assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); +} + static void i915_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *pages, uint64_t start, @@ -3071,6 +3135,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ret = ggtt_probe_common(dev, ggtt->size); ggtt->base.clear_range = gen8_ggtt_clear_range; + ggtt->base.insert_page = gen8_ggtt_insert_page; if (IS_CHERRYVIEW(dev_priv)) ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL; else @@ -3109,6 +3174,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) ret = ggtt_probe_common(dev, ggtt->size); ggtt->base.clear_range = gen6_ggtt_clear_range; + ggtt->base.insert_page = gen6_ggtt_insert_page; ggtt->base.insert_entries = gen6_ggtt_insert_entries; ggtt->base.bind_vma = ggtt_bind_vma; ggtt->base.unbind_vma = ggtt_unbind_vma; @@ -3140,6 +3206,7 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt) &ggtt->mappable_base, &ggtt->mappable_end); ggtt->do_idle_maps = needs_idle_maps(dev_priv->dev); + ggtt->base.insert_page = i915_ggtt_insert_page; ggtt->base.insert_entries = i915_ggtt_insert_entries; ggtt->base.clear_range = i915_ggtt_clear_range; ggtt->base.bind_vma = ggtt_bind_vma; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index d7dd3d8..2982a6a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -316,6 +316,11 @@ struct i915_address_space { uint64_t start, uint64_t length, bool use_scratch); + void (*insert_page)(struct i915_address_space *vm, + dma_addr_t addr, + uint64_t offset, + enum i915_cache_level cache_level, + u32 flags); void (*insert_entries)(struct i915_address_space *vm, struct sg_table *st, uint64_t start, diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h index 9e9bddaa5..f49edec 100644 --- a/include/drm/intel-gtt.h +++ b/include/drm/intel-gtt.h @@ -13,6 +13,9 @@ void intel_gmch_remove(void); bool intel_enable_gtt(void); void intel_gtt_chipset_flush(void); +void intel_gtt_insert_page(dma_addr_t addr, + unsigned int pg, + unsigned int flags); void intel_gtt_insert_sg_entries(struct sg_table *st, unsigned int pg_start, unsigned int flags);