Message ID | 20190613151836.16093-1-tvrtko.ursulin@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Implicit dev_priv removal and GT compartmentalization | expand |
Quoting Tvrtko Ursulin (2019-06-13 16:18:36) > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > Having introduced struct intel_gt (named the anonymous structure in i915) > we can start using it to compartmentalize our code better. It makes more > sense logically to have the code internally like this and it will also > help with future split between gt and display in i915. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > drivers/gpu/drm/i915/gem/i915_gem_object.c | 5 +-- > .../drm/i915/gem/selftests/i915_gem_mman.c | 3 +- > drivers/gpu/drm/i915/gt/intel_gt.c | 41 +++++++++++++++++++ > drivers/gpu/drm/i915/gt/intel_gt.h | 2 + > drivers/gpu/drm/i915/i915_drv.h | 2 - > drivers/gpu/drm/i915/i915_gem.c | 40 ------------------ > drivers/gpu/drm/i915/i915_vma.c | 3 +- > drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 2 +- > 8 files changed, 50 insertions(+), 48 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c > index 36b76c6a0a9d..9ae7743348f2 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c > @@ -27,6 +27,7 @@ > #include "i915_gem_context.h" > #include "i915_gem_object.h" > #include "i915_globals.h" > +#include "gt/intel_gt.h" g before i > #include "intel_frontbuffer.h" > > static struct i915_global_object { > @@ -367,7 +368,6 @@ void > i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, > unsigned int flush_domains) > { > - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); > struct i915_vma *vma; > > assert_object_held(obj); > @@ -377,8 +377,6 @@ i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, > > switch (obj->write_domain) { > case I915_GEM_DOMAIN_GTT: > - i915_gem_flush_ggtt_writes(dev_priv); > - > intel_fb_obj_flush(obj, > fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); > > @@ -386,6 +384,7 @@ i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, > if (vma->iomap) > continue; > > + intel_gt_flush_ggtt_writes(vma->vm->gt); > i915_vma_unset_ggtt_write(vma); > } > break; > diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c > index b92809418729..b46d57967bfa 100644 > --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c > +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c > @@ -6,6 +6,7 @@ > > #include <linux/prime_numbers.h> > > +#include "gt/intel_gt.h" > #include "gt/intel_gt_pm.h" > #include "huge_gem_object.h" > #include "i915_selftest.h" > @@ -143,7 +144,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, > if (offset >= obj->base.size) > continue; > > - i915_gem_flush_ggtt_writes(to_i915(obj->base.dev)); > + intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt); > > p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); > cpu = kmap(p) + offset_in_page(offset); > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c > index c6a67393ee72..7bf01365573a 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt.c > +++ b/drivers/gpu/drm/i915/gt/intel_gt.c > @@ -188,3 +188,44 @@ void intel_gt_init_swizzling(struct intel_gt *gt) > else > MISSING_CASE(INTEL_GEN(i915)); > } > + > +void intel_gt_flush_ggtt_writes(struct intel_gt *gt) > +{ > + struct drm_i915_private *i915 = gt->i915; > + intel_wakeref_t wakeref; > + > + /* > + * No actual flushing is required for the GTT write domain for reads > + * from the GTT domain. Writes to it "immediately" go to main memory > + * as far as we know, so there's no chipset flush. It also doesn't > + * land in the GPU render cache. > + * > + * However, we do have to enforce the order so that all writes through > + * the GTT land before any writes to the device, such as updates to > + * the GATT itself. > + * > + * We also have to wait a bit for the writes to land from the GTT. > + * An uncached read (i.e. mmio) seems to be ideal for the round-trip > + * timing. This issue has only been observed when switching quickly > + * between GTT writes and CPU reads from inside the kernel on recent hw, > + * and it appears to only affect discrete GTT blocks (i.e. on LLC > + * system agents we cannot reproduce this behaviour, until Cannonlake > + * that was!). > + */ > + > + wmb(); > + > + if (INTEL_INFO(i915)->has_coherent_ggtt) > + return; > + > + i915_gem_chipset_flush(i915); Another gt candidate iirc. It's a magic page in the chipset to force it to clear caches and pass a GOP. -Chris
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 36b76c6a0a9d..9ae7743348f2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -27,6 +27,7 @@ #include "i915_gem_context.h" #include "i915_gem_object.h" #include "i915_globals.h" +#include "gt/intel_gt.h" #include "intel_frontbuffer.h" static struct i915_global_object { @@ -367,7 +368,6 @@ void i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct i915_vma *vma; assert_object_held(obj); @@ -377,8 +377,6 @@ i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, switch (obj->write_domain) { case I915_GEM_DOMAIN_GTT: - i915_gem_flush_ggtt_writes(dev_priv); - intel_fb_obj_flush(obj, fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); @@ -386,6 +384,7 @@ i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj, if (vma->iomap) continue; + intel_gt_flush_ggtt_writes(vma->vm->gt); i915_vma_unset_ggtt_write(vma); } break; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index b92809418729..b46d57967bfa 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -6,6 +6,7 @@ #include <linux/prime_numbers.h> +#include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" #include "huge_gem_object.h" #include "i915_selftest.h" @@ -143,7 +144,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, if (offset >= obj->base.size) continue; - i915_gem_flush_ggtt_writes(to_i915(obj->base.dev)); + intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt); p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); cpu = kmap(p) + offset_in_page(offset); diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index c6a67393ee72..7bf01365573a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -188,3 +188,44 @@ void intel_gt_init_swizzling(struct intel_gt *gt) else MISSING_CASE(INTEL_GEN(i915)); } + +void intel_gt_flush_ggtt_writes(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + intel_wakeref_t wakeref; + + /* + * No actual flushing is required for the GTT write domain for reads + * from the GTT domain. Writes to it "immediately" go to main memory + * as far as we know, so there's no chipset flush. It also doesn't + * land in the GPU render cache. + * + * However, we do have to enforce the order so that all writes through + * the GTT land before any writes to the device, such as updates to + * the GATT itself. + * + * We also have to wait a bit for the writes to land from the GTT. + * An uncached read (i.e. mmio) seems to be ideal for the round-trip + * timing. This issue has only been observed when switching quickly + * between GTT writes and CPU reads from inside the kernel on recent hw, + * and it appears to only affect discrete GTT blocks (i.e. on LLC + * system agents we cannot reproduce this behaviour, until Cannonlake + * that was!). + */ + + wmb(); + + if (INTEL_INFO(i915)->has_coherent_ggtt) + return; + + i915_gem_chipset_flush(i915); + + with_intel_runtime_pm(i915, wakeref) { + struct intel_uncore *uncore = gt->uncore; + + spin_lock_irq(&uncore->lock); + intel_uncore_posting_read_fw(uncore, + RING_HEAD(RENDER_RING_BASE)); + spin_unlock_irq(&uncore->lock); + } +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index e026b2dc1115..700bb56d1e9b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -20,4 +20,6 @@ void intel_gt_clear_error_registers(struct intel_gt *gt, void intel_gt_init_swizzling(struct intel_gt *gt); +void intel_gt_flush_ggtt_writes(struct intel_gt *gt); + #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1eb203fdee60..4987a048b3d3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2648,8 +2648,6 @@ int __must_check i915_gem_evict_for_node(struct i915_address_space *vm, unsigned int flags); int i915_gem_evict_vm(struct i915_address_space *vm); -void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv); - /* belongs in i915_gem_gtt.h */ static inline void i915_gem_chipset_flush(struct drm_i915_private *dev_priv) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b7f88e2bd7df..4f9aac62a8a4 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -233,46 +233,6 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, &args->size, &args->handle); } -void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) -{ - intel_wakeref_t wakeref; - - /* - * No actual flushing is required for the GTT write domain for reads - * from the GTT domain. Writes to it "immediately" go to main memory - * as far as we know, so there's no chipset flush. It also doesn't - * land in the GPU render cache. - * - * However, we do have to enforce the order so that all writes through - * the GTT land before any writes to the device, such as updates to - * the GATT itself. - * - * We also have to wait a bit for the writes to land from the GTT. - * An uncached read (i.e. mmio) seems to be ideal for the round-trip - * timing. This issue has only been observed when switching quickly - * between GTT writes and CPU reads from inside the kernel on recent hw, - * and it appears to only affect discrete GTT blocks (i.e. on LLC - * system agents we cannot reproduce this behaviour, until Cannonlake - * that was!). - */ - - wmb(); - - if (INTEL_INFO(dev_priv)->has_coherent_ggtt) - return; - - i915_gem_chipset_flush(dev_priv); - - with_intel_runtime_pm(dev_priv, wakeref) { - struct intel_uncore *uncore = &dev_priv->uncore; - - spin_lock_irq(&uncore->lock); - intel_uncore_posting_read_fw(uncore, - RING_HEAD(RENDER_RING_BASE)); - spin_unlock_irq(&uncore->lock); - } -} - static int shmem_pread(struct page *page, int offset, int len, char __user *user_data, bool needs_clflush) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index cb341e4acf99..4f4695c351af 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -23,6 +23,7 @@ */ #include "gt/intel_engine.h" +#include "gt/intel_gt.h" #include "i915_vma.h" @@ -408,7 +409,7 @@ void i915_vma_flush_writes(struct i915_vma *vma) if (!i915_vma_has_ggtt_write(vma)) return; - i915_gem_flush_ggtt_writes(vma->vm->i915); + intel_gt_flush_ggtt_writes(vma->vm->gt); i915_vma_unset_ggtt_write(vma); } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 2093d08a7569..a67f0e9b4d5f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1195,7 +1195,7 @@ static int igt_ggtt_page(void *arg) iowrite32(n, vaddr + n); io_mapping_unmap_atomic(vaddr); } - i915_gem_flush_ggtt_writes(i915); + intel_gt_flush_ggtt_writes(ggtt->vm.gt); i915_random_reorder(order, count, &prng); for (n = 0; n < count; n++) {