Message ID | 1383451680-11173-21-git-send-email-benjamin.widawsky@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Sat, 2013-11-02 at 21:07 -0700, Ben Widawsky wrote: > With the PTE clarifications, the bind and clear functions can now be > added for gen8. > > v2: Use for_each_sg_pages in gen8_ggtt_insert_entries. > > v3: Drop dev argument to pte encode functions, upstream lost it. Also > rebase on top of the scratch page movement. > > v4: Rebase on top of the new address space vfuncs. > > v5: Add the bool use_scratch argument to clear_range and the bool valid argument > to the PTE encode function to follow upstream changes. > > Signed-off-by: Ben Widawsky <ben@bwidawsk.net> (v1) > Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Reviewed-by: Imre Deak <imre.deak@intel.com> > --- > drivers/gpu/drm/i915/i915_gem_gtt.c | 88 +++++++++++++++++++++++++++++++++++-- > 1 file changed, 85 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 8bf2184..df992dc 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -58,6 +58,15 @@ typedef uint64_t gen8_gtt_pte_t; > #define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb) > #define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6) > > +static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr, > + enum i915_cache_level level, > + bool valid) > +{ > + gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0; > + pte |= addr; > + return pte; > +} > + > static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr, > enum i915_cache_level level, > bool valid) > @@ -576,6 +585,56 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) > return 0; > } > > +static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte) > +{ > +#ifdef writeq > + writeq(pte, addr); > +#else > + iowrite32((u32)pte, addr); > + iowrite32(pte >> 32, addr + 4); > +#endif > +} > + > +static void gen8_ggtt_insert_entries(struct i915_address_space *vm, > + struct sg_table *st, > + unsigned int first_entry, > + enum i915_cache_level level) > +{ > + struct drm_i915_private *dev_priv = vm->dev->dev_private; > + gen8_gtt_pte_t __iomem *gtt_entries = > + (gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; > + int i = 0; > + struct sg_page_iter sg_iter; > + dma_addr_t addr; > + > + for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { > + addr = sg_dma_address(sg_iter.sg) + > + (sg_iter.sg_pgoffset << PAGE_SHIFT); > + gen8_set_pte(>t_entries[i], > + gen8_pte_encode(addr, level, true)); > + i++; > + } > + > + /* XXX: This serves as a posting read to make sure that the PTE has > + * actually been updated. There is some concern that even though > + * registers and PTEs are within the same BAR that they are potentially > + * of NUMA access patterns. Therefore, even with the way we assume > + * hardware should work, we must keep this posting read for paranoia. > + */ > + if (i != 0) > + WARN_ON(readl(>t_entries[i-1]) > + != gen8_pte_encode(addr, level, true)); > + > +#if 0 /* TODO: Still needed on GEN8? */ > + /* This next bit makes the above posting read even more important. We > + * want to flush the TLBs only after we're certain all the PTE updates > + * have finished. > + */ > + I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); > + POSTING_READ(GFX_FLSH_CNTL_GEN6); > +#endif > +} > + > /* > * Binds an object into the global gtt with the specified cache level. The object > * will be accessible to the GPU via commands whose operands reference offsets > @@ -618,6 +677,30 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, > POSTING_READ(GFX_FLSH_CNTL_GEN6); > } > > +static void gen8_ggtt_clear_range(struct i915_address_space *vm, > + unsigned int first_entry, > + unsigned int num_entries, > + bool use_scratch) > +{ > + struct drm_i915_private *dev_priv = vm->dev->dev_private; > + gen8_gtt_pte_t scratch_pte, __iomem *gtt_base = > + (gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; > + const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; > + int i; > + > + if (WARN(num_entries > max_entries, > + "First entry = %d; Num entries = %d (max=%d)\n", > + first_entry, num_entries, max_entries)) > + num_entries = max_entries; > + > + scratch_pte = gen8_pte_encode(vm->scratch.addr, > + I915_CACHE_LLC, > + use_scratch); > + for (i = 0; i < num_entries; i++) > + gen8_set_pte(>t_base[i], scratch_pte); > + readl(gtt_base); > +} > + > static void gen6_ggtt_clear_range(struct i915_address_space *vm, > unsigned int first_entry, > unsigned int num_entries, > @@ -641,7 +724,6 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, > readl(gtt_base); > } > > - > static void i915_ggtt_insert_entries(struct i915_address_space *vm, > struct sg_table *st, > unsigned int pg_start, > @@ -947,8 +1029,8 @@ static int gen8_gmch_probe(struct drm_device *dev, > > ret = ggtt_probe_common(dev, gtt_size); > > - dev_priv->gtt.base.clear_range = NULL; > - dev_priv->gtt.base.insert_entries = NULL; > + dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range; > + dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries; > > return ret; > }
> -----Original Message----- > From: intel-gfx-bounces@lists.freedesktop.org [mailto:intel-gfx- > bounces@lists.freedesktop.org] On Behalf Of Ben Widawsky > Sent: Sunday, November 03, 2013 4:07 AM > To: Intel GFX > Cc: Daniel Vetter; Ben Widawsky; Widawsky, Benjamin > Subject: [Intel-gfx] [PATCH 20/62] drm/i915/bdw: Add GTT functions > > With the PTE clarifications, the bind and clear functions can now be added for > gen8. > > v2: Use for_each_sg_pages in gen8_ggtt_insert_entries. > > v3: Drop dev argument to pte encode functions, upstream lost it. Also rebase > on top of the scratch page movement. > > v4: Rebase on top of the new address space vfuncs. > > v5: Add the bool use_scratch argument to clear_range and the bool valid > argument to the PTE encode function to follow upstream changes. > > Signed-off-by: Ben Widawsky <ben@bwidawsk.net> (v1) > Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> > --- > drivers/gpu/drm/i915/i915_gem_gtt.c | 88 > +++++++++++++++++++++++++++++++++++-- > 1 file changed, 85 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c > b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 8bf2184..df992dc 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -58,6 +58,15 @@ typedef uint64_t gen8_gtt_pte_t; > #define HSW_WB_ELLC_LLC_AGE0 > HSW_CACHEABILITY_CONTROL(0xb) > #define HSW_WT_ELLC_LLC_AGE0 > HSW_CACHEABILITY_CONTROL(0x6) > > +static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr, > + enum i915_cache_level level, > + bool valid) > +{ > + gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0; > + pte |= addr; > + return pte; > +} > + > static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr, > enum i915_cache_level level, > bool valid) > @@ -576,6 +585,56 @@ int i915_gem_gtt_prepare_object(struct > drm_i915_gem_object *obj) > return 0; > } > > +static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte) > +{ #ifdef writeq > + writeq(pte, addr); > +#else > + iowrite32((u32)pte, addr); > + iowrite32(pte >> 32, addr + 4); > +#endif > +} > + > +static void gen8_ggtt_insert_entries(struct i915_address_space *vm, > + struct sg_table *st, > + unsigned int first_entry, > + enum i915_cache_level level) > +{ > + struct drm_i915_private *dev_priv = vm->dev->dev_private; > + gen8_gtt_pte_t __iomem *gtt_entries = > + (gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + > first_entry; > + int i = 0; > + struct sg_page_iter sg_iter; > + dma_addr_t addr; > + > + for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { > + addr = sg_dma_address(sg_iter.sg) + > + (sg_iter.sg_pgoffset << PAGE_SHIFT); > + gen8_set_pte(>t_entries[i], > + gen8_pte_encode(addr, level, true)); > + i++; > + } > + > + /* XXX: This serves as a posting read to make sure that the PTE has > + * actually been updated. There is some concern that even though > + * registers and PTEs are within the same BAR that they are > potentially > + * of NUMA access patterns. Therefore, even with the way we > assume > + * hardware should work, we must keep this posting read for > paranoia. > + */ > + if (i != 0) > + WARN_ON(readl(>t_entries[i-1]) > + != gen8_pte_encode(addr, level, true)); Comparing a u32 with a 64-bit page-table entry ?
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 8bf2184..df992dc 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -58,6 +58,15 @@ typedef uint64_t gen8_gtt_pte_t; #define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb) #define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6) +static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + bool valid) +{ + gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0; + pte |= addr; + return pte; +} + static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr, enum i915_cache_level level, bool valid) @@ -576,6 +585,56 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) return 0; } +static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte) +{ +#ifdef writeq + writeq(pte, addr); +#else + iowrite32((u32)pte, addr); + iowrite32(pte >> 32, addr + 4); +#endif +} + +static void gen8_ggtt_insert_entries(struct i915_address_space *vm, + struct sg_table *st, + unsigned int first_entry, + enum i915_cache_level level) +{ + struct drm_i915_private *dev_priv = vm->dev->dev_private; + gen8_gtt_pte_t __iomem *gtt_entries = + (gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; + int i = 0; + struct sg_page_iter sg_iter; + dma_addr_t addr; + + for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { + addr = sg_dma_address(sg_iter.sg) + + (sg_iter.sg_pgoffset << PAGE_SHIFT); + gen8_set_pte(>t_entries[i], + gen8_pte_encode(addr, level, true)); + i++; + } + + /* XXX: This serves as a posting read to make sure that the PTE has + * actually been updated. There is some concern that even though + * registers and PTEs are within the same BAR that they are potentially + * of NUMA access patterns. Therefore, even with the way we assume + * hardware should work, we must keep this posting read for paranoia. + */ + if (i != 0) + WARN_ON(readl(>t_entries[i-1]) + != gen8_pte_encode(addr, level, true)); + +#if 0 /* TODO: Still needed on GEN8? */ + /* This next bit makes the above posting read even more important. We + * want to flush the TLBs only after we're certain all the PTE updates + * have finished. + */ + I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); + POSTING_READ(GFX_FLSH_CNTL_GEN6); +#endif +} + /* * Binds an object into the global gtt with the specified cache level. The object * will be accessible to the GPU via commands whose operands reference offsets @@ -618,6 +677,30 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, POSTING_READ(GFX_FLSH_CNTL_GEN6); } +static void gen8_ggtt_clear_range(struct i915_address_space *vm, + unsigned int first_entry, + unsigned int num_entries, + bool use_scratch) +{ + struct drm_i915_private *dev_priv = vm->dev->dev_private; + gen8_gtt_pte_t scratch_pte, __iomem *gtt_base = + (gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; + const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; + int i; + + if (WARN(num_entries > max_entries, + "First entry = %d; Num entries = %d (max=%d)\n", + first_entry, num_entries, max_entries)) + num_entries = max_entries; + + scratch_pte = gen8_pte_encode(vm->scratch.addr, + I915_CACHE_LLC, + use_scratch); + for (i = 0; i < num_entries; i++) + gen8_set_pte(>t_base[i], scratch_pte); + readl(gtt_base); +} + static void gen6_ggtt_clear_range(struct i915_address_space *vm, unsigned int first_entry, unsigned int num_entries, @@ -641,7 +724,6 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, readl(gtt_base); } - static void i915_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *st, unsigned int pg_start, @@ -947,8 +1029,8 @@ static int gen8_gmch_probe(struct drm_device *dev, ret = ggtt_probe_common(dev, gtt_size); - dev_priv->gtt.base.clear_range = NULL; - dev_priv->gtt.base.insert_entries = NULL; + dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range; + dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries; return ret; }